class SimplePoParser::Parser

Fast parser directly using Rubys powerful StringScanner (strscan)

Important notes about StringScanner.scan:

let scan return an empty string if there is "no match" as the empty string qualifies as
a match of the regex (zero times). We make use of this "trick"
the next newline is hit (unless multi-line mode is explicitly enabled)

Public Instance Methods

parse(message) click to toggle source

parse a single message of the PO format.

@param message a single PO message in String format without leading or trailing whitespace @return [Hash] parsed PO message information in Hash format

# File lib/simple_po_parser/parser.rb, line 21
def parse(message)
  @result = {}
  @scanner = StringScanner.new(message.strip)
  begin
    lines
  rescue ParserError => pe
    error_msg = "SimplePoParser::ParserError"
    error_msg += pe.message
    error_msg += "\nParseing result before error: '#{@result}'"
    error_msg += "\nSimplePoParser filtered backtrace: SimplePoParser::ParserError"
    backtrace = "#{pe.backtrace.select{|i| i =~ /lib\/simple_po_parser/}.join("\n\tfrom ")}"
    raise ParserError, error_msg, backtrace
  end
  @result
end

Private Instance Methods

add_result(key, text) click to toggle source

adds text to the given key in results creates an array if the given key already has a result

# File lib/simple_po_parser/parser.rb, line 364
def add_result(key, text)
  if @result[key]
    if @result[key].is_a? Array
      @result[key].push(text)
    else
      @result[key] = [@result[key], text]
    end
  else
    @result[key] = text
  end
end
comment() click to toggle source

match a comment line. called on lines starting with '#'. Recalls line when the comment line was parsed

# File lib/simple_po_parser/parser.rb, line 61
def comment
  begin
    case @scanner.getch
    when ' '
      skip_whitespace
      add_result(:translator_comment, comment_text)
      lines
    when '.'
      skip_whitespace
      add_result(:extracted_comment, comment_text)
      lines
    when ':'
      skip_whitespace
      add_result(:reference, comment_text)
      lines
    when ','
      skip_whitespace
      add_result(:flag, comment_text)
      lines
    when '|'
      skip_whitespace
      previous_comments
      lines
    when "\n"
      add_result(:translator_comment, "") # empty comment line
      lines
    when '~'
      if @result[:previous_msgctxt] || @result[:previous_msgid] || @result[:previous_msgid_plural]
        raise PoSyntaxError, "Previous comment entries need to be marked obsolete too in obsolete message entries. But already got: #{@result}"
      end
      skip_whitespace
      add_result(:obsolete, comment_text)
      obsoletes
    else
      @scanner.pos = @scanner.pos - 2
      raise PoSyntaxError, "Unknown comment type #{@scanner.peek(10).inspect}"
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in comment\n" + pe.message, pe.backtrace
  end
end
comment_text() click to toggle source

returns the text of a comment

@return [String] text

# File lib/simple_po_parser/parser.rb, line 330
def comment_text
  begin
    text = @scanner.scan(/.*/) # everything until newline
    text.rstrip! # benchmarked faster too rstrip the string in place
    raise PoSyntaxError, "Comment text should advance to next line or stop at eos" unless end_of_line
    text
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in commtent_text\n" + pe.message, pe.backtrace
  end
end
end_of_line() click to toggle source

returns true if the scanner is at beginning of next line or end of string

@return [Boolean] true if scanner at beginning of line or eos

# File lib/simple_po_parser/parser.rb, line 357
def end_of_line
  @scanner.scan(/\n/)
  @scanner.eos? || @scanner.bol?
end
lines() click to toggle source

arbitary line of a PO message. Can be comment or message message parsing is always started with checking for msgctxt as content is expected in msgctxt -> msgid -> msgid_plural -> msgstr order

# File lib/simple_po_parser/parser.rb, line 46
def lines
  begin
    if @scanner.scan(/#/)
      comment
    else
      msgctxt
    end
  rescue PoSyntaxError => pe
    # throw a normal ParserError to break the recursion
    raise ParserError, "Syntax error in lines\n" + pe.message, pe.backtrace
  end
end
message_line() click to toggle source

identifies a message line and returns it's text or raises an error

@return [String] message_text

# File lib/simple_po_parser/parser.rb, line 285
def message_line
  begin
    if @scanner.getch == '"'
      text = message_text
      unless @scanner.getch == '"'
        err_msg = "The message text '#{text}' must be finished with the double quote character '\"'."
        raise PoSyntaxError, err_msg
      end
      skip_whitespace
      unless end_of_line
        err_msg = "There should be only whitespace until the end of line"
        err_msg += " after the double quote character of a message text."
        raise PoSyntaxError.new(err_msg)
      end
      text
    else
      @scanner.pos = @scanner.pos - 1
      err_msg = "A message text needs to start with the double quote character '\"',"
      err_msg += " but this was found: #{@scanner.peek(10).inspect}"
      raise PoSyntaxError, err_msg
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in message_line\n" + pe.message, pe.backtrace
  end
end
message_multiline(key) click to toggle source

parses a multiline message

multiline messages are indicated by an empty content as first line and the next line starting with the double quote character

# File lib/simple_po_parser/parser.rb, line 270
def message_multiline(key)
  begin
    skip_whitespace
    if @scanner.check(/"/)
      add_result(key, message_line)
      message_multiline(key)
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in message_multiline with key '#{key}'\n" + pe.message, pe.backtrace
  end
end
message_text() click to toggle source

returns the text of a message line

@return [String] text

# File lib/simple_po_parser/parser.rb, line 344
def message_text
  @scanner.scan_until(/(\\(\\|")|[^"])*/) # this parses anything until an unescaped quote is hit
end
msgctxt() click to toggle source

matches the msgctxt line and will continue to check for msgid afterwards

msgctxt is optional

# File lib/simple_po_parser/parser.rb, line 106
def msgctxt
  begin
    if @scanner.scan(/msgctxt/)
      skip_whitespace
      text = message_line
      add_result(:msgctxt, text)
      message_multiline(:msgctxt) if text.empty?
    end
    msgid
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in msgctxt\n" + pe.message, pe.backtrace
  end
end
msgid() click to toggle source

matches the msgid line. Will check for optional msgid_plural. Will advance to msgstr or msgstr_plural based on msgid_plural

msgid is required

# File lib/simple_po_parser/parser.rb, line 124
def msgid
  begin
    if @scanner.scan(/msgid/)
      skip_whitespace
      text = message_line
      add_result(:msgid, text)
      message_multiline(:msgid) if text.empty?
      if msgid_plural
        msgstr_plural
      else
        msgstr
      end
    else
      err_msg = "Message without msgid is not allowed."
      err_msg += "The Line started unexpectedly with #{@scanner.peek(10).inspect}."
      raise PoSyntaxError, err_msg
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
  end

end
msgid_plural() click to toggle source

matches the msgid_plural line.

msgid_plural is optional

@return [boolean] true if msgid_plural is present, false otherwise

# File lib/simple_po_parser/parser.rb, line 152
def msgid_plural
  begin
    if @scanner.scan(/msgid_plural/)
      skip_whitespace
      text = message_line
      add_result(:msgid_plural, text)
      message_multiline(:msgid_plural) if text.empty?
      true
    else
      false
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in msgid\n" + pe.message, pe.backtrace
  end
end
msgstr() click to toggle source

parses the msgstr singular line

msgstr is required in singular translations

# File lib/simple_po_parser/parser.rb, line 171
def msgstr
  begin
    if @scanner.scan(/msgstr/)
      skip_whitespace
      text = message_line
      add_result(:msgstr, text)
      message_multiline(:msgstr) if text.empty?
      skip_whitespace
      raise PoSyntaxError, "Unexpected content after expected message end #{@scanner.peek(10).inspect}" unless @scanner.eos?
    else
     raise PoSyntaxError, "Singular message without msgstr is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in msgstr\n" + pe.message, pe.backtrace
  end
end
msgstr_plural(num = 0) click to toggle source

parses the msgstr plural lines

msgstr plural lines are used when there is msgid_plural. They have the format msgstr where N is incremental number starting from zero representing the plural number as specified in the headers “Plural-Forms” entry. Most languages, like the English language only have two plural forms (singular and plural), but there are languages with more plurals

# File lib/simple_po_parser/parser.rb, line 195
def msgstr_plural(num = 0)
  begin
    msgstr_key = @scanner.scan(/msgstr\[\d\]/) # matches 'msgstr[0]' to 'msgstr[9]'
    if msgstr_key
      # msgstr plurals must come in 0-based index in order
      msgstr_num = msgstr_key.match(/\d/)[0].to_i
      raise PoSyntaxError, "Bad 'msgstr[index]' index." if msgstr_num != num
      skip_whitespace
      text = message_line
      add_result(msgstr_key, text)
      message_multiline(msgstr_key) if text.empty?
      msgstr_plural(num+1)
    elsif num == 0 # and msgstr_key was false
      raise PoSyntaxError, "Plural message without msgstr[0] is not allowed. Line started unexpectedly with #{@scanner.peek(10).inspect}."
    else
      raise PoSyntaxError, "End of message was expected, but line started unexpectedly with #{@scanner.peek(10).inspect}" unless @scanner.eos?
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in msgstr_plural\n" + pe.message, pe.backtrace
  end
end
obsoletes() click to toggle source

parses all obsolete lines. An obsolete message may only contain obsolete lines

# File lib/simple_po_parser/parser.rb, line 313
def obsoletes
  if @scanner.scan(/#~/)
    skip_whitespace
    add_result(:obsolete, comment_text)
    obsoletes
  else
    raise PoSyntaxError, "All lines must be obsolete after the first obsolete line, but got #{@scanner.peek(10).inspect}." unless @scanner.eos?
  end
end
previous_comments() click to toggle source

parses previous comments, which provide additional information on fuzzy matching

previous comments are:

# File lib/simple_po_parser/parser.rb, line 223
def previous_comments
  begin
    # next part must be msgctxt, msgid or msgid_plural
    if @scanner.scan(/msg/)
      if @scanner.scan(/id/)
        if @scanner.scan(/_plural/)
          key = :previous_msgid_plural
        else
          key = :previous_msgid
        end
      elsif @scanner.scan(/ctxt/)
        key = :previous_msgctxt
      else
        raise PoSyntaxError, "Previous comment type #{("msg" + @scanner.peek(10)).inspect} unknown."
      end
      skip_whitespace
      text = message_line
      add_result(key, text)
      previous_multiline(key) if text.empty?
    else
      raise PoSyntaxError, "Previous comments must start with '#| msg'. #{@scanner.peek(10).inspect} unknown."
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in previous_comments\n" + pe.message, pe.backtrace
  end
end
previous_multiline(key) click to toggle source

parses the multiline messages of the previous comment lines

# File lib/simple_po_parser/parser.rb, line 251
def previous_multiline(key)
  begin
    # scan multilines until no further multiline is hit
    # /#\|\p{Blank}"/ needs to catch the double quote to ensure it hits a previous
    # multiline and not another line type.
    if @scanner.scan(/#\|\p{Blank}*"/)
      @scanner.pos = @scanner.pos - 1 # go one character back, so we can reuse the "message line" method
      add_result(key, message_line)
      previous_multiline(key) # go on until we no longer hit a multiline line
    end
  rescue PoSyntaxError => pe
    raise PoSyntaxError, "Syntax error in previous_multiline\n" + pe.message, pe.backtrace
  end
end
skip_whitespace() click to toggle source

advances the scanner until the next non whitespace position. Does not match newlines. See WHITESPACE_REGEX constant

# File lib/simple_po_parser/parser.rb, line 350
def skip_whitespace
  @scanner.skip(/\p{Blank}+/)
end