module EBNF::PEG::Rule

Behaviior for parsing a PEG rule

Attributes

parser[RW]

Initialized by parser when loading rules. Used for finding rules and invoking elements of the parse process.

@return [EBNF::PEG::Parser] parser

Public Instance Methods

eat_whitespace(input) click to toggle source

Eat whitespace between non-terminal rules

# File lib/ebnf/peg/rule.rb, line 268
def eat_whitespace(input)
  if parser.whitespace.is_a?(Regexp)
    # Eat whitespace before a non-terminal
    input.skip(parser.whitespace)
  elsif parser.whitespace.is_a?(Rule)
    parser.whitespace.parse(input) # throw away result
  end
end
parse(input) click to toggle source

If there is are ‘start_production` and/or `production`, they are invoked with a `prod_data` stack, the input stream and offset. Otherwise, the results are added as an array value to a hash indexed by the rule name.

If matched, the input position is updated and the results returned in a Hash.

  • ‘alt`: returns the value of the matched production or `:unmatched`.

  • ‘diff`: returns the value matched, or `:unmatched`.

  • ‘hex`: returns a string composed of the matched hex character, or `:unmatched`.

  • ‘opt`: returns the value matched, or `nil` if unmatched.

  • ‘plus`: returns an array of the values matched for the specified production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.

  • ‘range`: returns a string composed of the values matched, or `:unmatched`, if less than `min` are matched.

  • ‘rept`: returns an array of the values matched for the speficied production, or `:unmatched`, if none are matched. For Terminals, these are concatenated into a single string.

  • ‘seq`: returns an array composed of single-entry hashes for each matched production indexed by the production name, or `:unmatched` if any production fails to match. For Terminals, returns a string created by concatenating these values. Via option in a `production` or definition, the result can be a single hash with values for each matched production; note that this is not always possible due to the possibility of repeated productions within the sequence.

  • ‘star`: returns an array of the values matched for the specified production. For Terminals, these are concatenated into a single string.

@param [Scanner] input @return [Hash{Symbol => Object}, :unmatched] A hash with keys for matched component of the expression. Returns :unmatched if the input does not match the production.

# File lib/ebnf/peg/rule.rb, line 35
def parse(input)
  # Save position and linenumber for backtracking
  pos, lineno = input.pos, input.lineno

  parser.packrat[sym] ||= {}
  if parser.packrat[sym][pos]
    parser.debug("#{sym}(:memo)", lineno: lineno) { "#{parser.packrat[sym][pos].inspect}(@#{pos})"}
    input.pos, input.lineno = parser.packrat[sym][pos][:pos], parser.packrat[sym][pos][:lineno]
    return parser.packrat[sym][pos][:result]
  end

  if terminal?
    # If the terminal is defined with a regular expression,
    # use that to match the input,
    # otherwise,
    if regexp = parser.terminal_regexp(sym)
      term_opts = parser.terminal_options(sym)
      if matched = input.scan(regexp)
        # Optionally map matched
        matched = term_opts.fetch(:map, {}).fetch(matched.downcase, matched)

        # Optionally unescape matched
        matched = unescape(matched) if term_opts[:unescape]
      end

      result = parser.onTerminal(sym, (matched ? matched : :unmatched))

      # Update furthest failure for strings and terminals
      parser.update_furthest_failure(input.pos, input.lineno, sym) if result == :unmatched
      parser.packrat[sym][pos] = {
        pos: input.pos,
        lineno: input.lineno,
        result: result
      }
      return parser.packrat[sym][pos][:result]
    end
  else
    eat_whitespace(input)
  end
  start_options = parser.onStart(sym)
  string_regexp_opts = start_options[:insensitive_strings] ? Regexp::IGNORECASE : 0

  result = case expr.first
  when :alt
    # Return the first expression to match.
    # Result is either :unmatched, or the value of the matching rule
    alt = :unmatched
    expr[1..-1].each do |prod|
      alt = case prod
      when Symbol
        rule = parser.find_rule(prod)
        raise "No rule found for #{prod}" unless rule
        rule.parse(input)
      when String
        s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
        case start_options[:insensitive_strings]
        when :lower then s && s.downcase
        when :upper then s && s.upcase
        else s
        end || :unmatched
      end
      if alt == :unmatched
        # Update furthest failure for strings and terminals
        parser.update_furthest_failure(input.pos, input.lineno, prod) if prod.is_a?(String) || rule.terminal?
      else
        break
      end
    end
    alt
  when :diff
    # matches any string that matches A but does not match B.
    # (Note, this is only used for Terminal rules, non-terminals will use :not)
    raise "Diff used on non-terminal #{prod}" unless terminal?
    re1, re2 = Regexp.new(translate_codepoints(expr[1])), Regexp.new(translate_codepoints(expr[2]))
    matched = input.scan(re1)
    if !matched || re2.match?(matched)
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, sym)
      :unmatched
    else
      matched
    end
  when :hex
    # Matches the given hex character if expression matches the character whose number (code point) in ISO/IEC 10646 is N. The number of leading zeros in the #xN form is insignificant.
    input.scan(to_regexp) || begin
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, expr.last)
      :unmatched
    end
  when :not
    # matches any string that does not match B.
    res = case prod = expr[1]
    when Symbol
      rule = parser.find_rule(prod)
      raise "No rule found for #{prod}" unless rule
      rule.parse(input)
    when String
      input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts)) || :unmatched
    end
    if res != :unmatched
      # Update furthest failure for terminals
      parser.update_furthest_failure(input.pos, input.lineno, sym) if terminal?
      :unmatched
    else
      nil
    end
  when :opt
    # Result is the matched value or nil
    opt = rept(input, 0, 1, expr[1], string_regexp_opts, **start_options)

    # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
    opt.first
  when :plus
    # Result is an array of all expressions while they match,
    # at least one must match
    plus = rept(input, 1, '*', expr[1], string_regexp_opts)

    # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
    plus.is_a?(Array) && terminal? ? plus.join("") : plus
  when :range, :istr
    # Matches the specified character range
    input.scan(to_regexp) || begin
      # Update furthest failure for strings and terminals
      parser.update_furthest_failure(input.pos, input.lineno, expr[1])
      :unmatched
    end
  when :rept
    # Result is an array of all expressions while they match,
    # an empty array of none match
    rept = rept(input, expr[1], expr[2], expr[3], string_regexp_opts)

    # # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, expr[3]) if terminal?
    rept.is_a?(Array) && terminal? ? rept.join("") : rept
  when :seq
    # Evaluate each expression into an array of hashes where each hash contains a key from the associated production and the value is the parsed value of that production. Returns :unmatched if the input does not match the production. Value ordering is ensured by native Hash ordering.
    seq = expr[1..-1].each_with_object([]) do |prod, accumulator|
      eat_whitespace(input) unless accumulator.empty? || terminal?
      res = case prod
      when Symbol
        rule = parser.find_rule(prod)
        raise "No rule found for #{prod}" unless rule
        rule.parse(input)
      when String
        s = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))
        case start_options[:insensitive_strings]
        when :lower then s && s.downcase
        when :upper then s && s.upcase
        else s
        end || :unmatched
      end
      if res == :unmatched
        # Update furthest failure for strings and terminals
        parser.update_furthest_failure(input.pos, input.lineno, prod)
        break :unmatched 
      end
      accumulator << {prod.to_sym => res}
    end
    if seq == :unmatched
      :unmatched
    elsif terminal?
      seq.map(&:values).compact.join("") # Concat values for terminal production
    elsif start_options[:as_hash]
      seq.inject {|memo, h| memo.merge(h)}
    else
      seq
    end
  when :star
    # Result is an array of all expressions while they match,
    # an empty array of none match
    star = rept(input, 0, '*', expr[1], string_regexp_opts)

    # Update furthest failure for strings and terminals
    parser.update_furthest_failure(input.pos, input.lineno, expr[1]) if terminal?
    star.is_a?(Array) && terminal? ? star.join("") : star
  else
    raise "attempt to parse unknown rule type: #{expr.first}"
  end

  if result == :unmatched
    input.pos, input.lineno = pos, lineno
  end

  result = parser.onFinish(result)
  (parser.packrat[sym] ||= {})[pos] = {
    pos: input.pos,
    lineno: input.lineno,
    result: result
  }
  return parser.packrat[sym][pos][:result]
end
rept(input, min, max, prod, string_regexp_opts, **options) click to toggle source

Repitition, 0-1, 0-n, 1-n, …

Note, nil results are removed from the result, but count towards min/max calculations

@param [Scanner] input @param [Integer] min @param [Integer] max

If it is an integer, it stops matching after max entries.

@param [Symbol, String] prod @param [Integer] string_regexp_opts @return [:unmatched, Array]

# File lib/ebnf/peg/rule.rb, line 241
def rept(input, min, max, prod, string_regexp_opts, **options)
  result = []

  case prod
  when Symbol
    rule = parser.find_rule(prod)
    raise "No rule found for #{prod}" unless rule
    while (max == '*' || result.length < max) && (res = rule.parse(input)) != :unmatched
      eat_whitespace(input) unless terminal?
      result << res
    end
  when String
    while (res = input.scan(Regexp.new(Regexp.quote(prod), string_regexp_opts))) && (max == '*' || result.length < max)
      eat_whitespace(input) unless terminal?
      result << case options[:insensitive_strings]
      when :lower then res.downcase
      when :upper then res.upcase
      else res
      end
    end
  end

  result.length < min ? :unmatched : result.compact
end