module EBNF::PEG::Parser

A Generic PEG parser using the parsed rules modified for PEG parseing.

Attributes

packrat[R]

A Hash structure used for memoizing rule results for a given input location.

@example Partial structure for memoizing results for a particular rule

    {
      rule: {
        86: {
              pos: 
              result: [<EBNF::Rule:80 {
                sym: :ebnf,
                  id: "1",
                  kind: :rule,
                  expr: [:star, [:alt, :declaration, :rule]]}>],
             }
        131: [<EBNF::Rule:80 {sym: :ebnf,
            id: "1",
            kind: :rule,
            expr: [:star, [:alt, :declaration, :rule]]}>,
          <EBNF::Rule:100 {
            sym: :declaration,
            id: "2",
            kind: :rule,
            expr: [:alt, "@terminals", :pass]}>]
      },
      POSTFIX: {
        80: "*",
        368: "*",
        399: "+"
      }
    }

@return [Hash{Integer => Hash{Symbol => Object}}]

scanner[R]

@return [Scanner] used for scanning input.

whitespace[R]

@return [Regexp, Rule] how to remove inter-rule whitespace

Public Class Methods

included(base) click to toggle source
# File lib/ebnf/peg/parser.rb, line 47
def self.included(base)
  base.extend(ClassMethods)
end

Public Instance Methods

clear_packrat() click to toggle source

Clear out packrat memoizer. This is appropriate when completing a top-level rule when there is no possibility of backtracking.

# File lib/ebnf/peg/parser.rb, line 266
def clear_packrat; @packrat.clear; end
debug(*args) { || ... } click to toggle source

Debug logging.

The call is ignored, unless ‘@options` is set.

@overload debug(node, message, **options)

@param [Array<String>] args Relevant location associated with message
@param [Hash] options
@option options [Integer] :depth
  Recursion depth for indenting output
@yieldreturn [String] additional string appended to `message`.
# File lib/ebnf/peg/parser.rb, line 348
def debug(*args, &block)
  return unless @options[:logger]
  options = args.last.is_a?(Hash) ? args.pop : {}
  lineno = options[:lineno] || (scanner.lineno if scanner)
  level = options.fetch(:level, 0)
  depth = options[:depth] || self.depth

  if self.respond_to?(:log_debug)
    level = [:debug, :info, :warn, :error, :fatal][level]
    log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
  elsif @options[:logger].respond_to?(:add)
    args << yield if block_given?
    @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
  elsif @options[:logger].respond_to?(:<<)
    args << yield if block_given?
    @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
  end
end
depth() click to toggle source

Depth of parsing, for log output.

# File lib/ebnf/peg/parser.rb, line 260
def depth; (@productions || []).length; end
error(node, message, **options) click to toggle source

Error information, used as level ‘3` logger messages. Messages may be logged and are saved for reporting at end of parsing.

@param [String] node Relevant location associated with message @param [String] message Error string @param [Hash{Symbol => Object}] options @option options [URI, to_s] :production @option options [Boolean] :raise abort furhter processing @option options [Array] :backtrace state where error occured @see debug

# File lib/ebnf/peg/parser.rb, line 279
def error(node, message, **options)
  lineno = options[:lineno] || (scanner.lineno if scanner)
  m = "ERROR "
  m += "[line: #{lineno}] " if lineno
  m += message
  m += " (found #{options[:rest].inspect})" if options[:rest]
  m += ", production = #{options[:production].inspect}" if options[:production]
  @error_log << m unless @recovering
  @recovering = true
  debug(node, m, level: 3, **options)
  if options[:raise] || @options[:validate]
    raise Error.new(m,
            lineno: lineno,
            rest: options[:rest],
            production: options[:production],
            backtrace: options[:backtrace])
  end
end
find_rule(sym) click to toggle source

Find a rule for a symbol

@param [Symbol] sym @return [Rule]

# File lib/ebnf/peg/parser.rb, line 467
def find_rule(sym)
  @rules[sym]
end
onFinish(result) click to toggle source

Finish of production

@param [Object] result parse result @return [Object] parse result, or the value returned from the handler

# File lib/ebnf/peg/parser.rb, line 406
def onFinish(result)
  #puts "prod_data(f): " + @prod_data.inspect
  prod = @productions.last
  handler, clear_packrat = self.class.production_handlers[prod]
  data = @prod_data.pop if handler || self.class.start_handlers[prod]
  error("finish",
    "prod_data production mismatch: expected #{prod.inspect}, got #{data[:_production].inspect}",
    production: prod, prod_data: @prod_data) if data && prod != data[:_production]
  if handler && !@recovering && result != :unmatched
    # Pop production data element from stack, potentially allowing handler to use it
    result = begin
      self.class.eval_with_binding(self) {
        handler.call(result, data, @parse_callback)
      }
    rescue ArgumentError, Error => e
      error("finish", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
      @recovering = false
    end
  end
  debug("#{prod}(:finish)", "",
         lineno: (scanner.lineno if scanner),
         level: result == :unmatched ? 0 : 1) do
    "#{result.inspect}@(#{scanner ? scanner.pos : '?'}), rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
  end
  self.clear_packrat if clear_packrat
  @productions.pop
  result
end
onStart(prod) click to toggle source

Start for production Adds data avoiable during the processing of the production

@return [Hash] composed of production options. Currently only ‘as_hash` is supported. @see ClassMethods#start_production

# File lib/ebnf/peg/parser.rb, line 372
def onStart(prod)
  handler = self.class.start_handlers[prod]
  @productions << prod
  debug("#{prod}(:start)", "",
    lineno: (scanner.lineno if scanner),
    pos: (scanner.pos if scanner)
  ) do
      "#{prod}, pos: #{scanner ? scanner.pos : '?'}, rest: #{scanner ? scanner.rest[0..20].inspect : '?'}"
  end
  if handler
    # Create a new production data element, potentially allowing handler
    # to customize before pushing on the @prod_data stack
    data = {_production: prod}
    begin
      self.class.eval_with_binding(self) {
        handler.call(data, @parse_callback)
      }
    rescue ArgumentError, Error => e
      error("start", "#{e.class}: #{e.message}", production: prod, backtrace: e.backtrace)
      @recovering = false
    end
    @prod_data << data
  elsif self.class.production_handlers[prod]
    # Make sure we push as many was we pop, even if there is no
    # explicit start handler
    @prod_data << {_production: prod}
  end
  return self.class.start_options.fetch(prod, {}) # any options on this production
end
onTerminal(prod, value) click to toggle source

A terminal with a defined handler

@param [Symbol] prod from the symbol of the associated rule @param [String] value the scanned string @return [String, Object] either the result from the handler, or the token

# File lib/ebnf/peg/parser.rb, line 440
def onTerminal(prod, value)
  parentProd = @productions.last
  handler = self.class.terminal_handlers[prod]
  if handler && value != :unmatched
    value = begin
      self.class.eval_with_binding(self) {
        handler.call(value, parentProd, @parse_callback)
      }
    rescue ArgumentError, Error => e
      error("terminal", "#{e.class}: #{e.message}", value: value, production: prod, backtrace: e.backtrace)
      @recovering = false
    end
  end
  progress("#{prod}(:terminal)", "",
           depth: (depth + 1),
           lineno: (scanner.lineno if scanner),
           level: value == :unmatched ? 0 : 1) do
    "#{value.inspect}@(#{scanner ? scanner.pos : '?'})"
  end
  value
end
parse(input = nil, start = nil, rules = nil, **options, &block) click to toggle source

Initializes a new parser instance.

@param [String, to_s] input @param [Symbol, to_s] start

The starting production for the parser. It may be a URI from the grammar, or a symbol representing the local_name portion of the grammar URI.

@param [Array<EBNF::PEG::Rule>] rules

The parsed rules, which control parsing sequence.
Identify the symbol of the starting rule with `start`.

@param [Hash{Symbol => Object}] options @option options :high_water passed to lexer @option options [Logger] :logger for errors/progress/debug. @option options :low_water passed to lexer @option options :seq_hash (false)

If `true`, sets the default for the value sent to a production handler that is for a `seq` to a hash composed of the flattened consitutent hashes that are otherwise provided.

@option options [Symbol, Regexp] :whitespace

Symbol of whitespace rule (defaults to `@pass`), or a regular expression
for eating whitespace between non-terminal rules (strongly encouraged).

@yield [context, *data]

Yields to return data to parser

@yieldparam [:statement, :trace] context

Context for block

@yieldparam [Symbol] *data

Data specific to the call

@return [Object] AST resulting from parse @raise [Exception] Raises exceptions for parsing errors

or errors raised during processing callbacks. Internal
errors are raised using {Error}.

@todo FIXME implement seq_hash

# File lib/ebnf/peg/parser.rb, line 204
def parse(input = nil, start = nil, rules = nil, **options, &block)
  start ||= options[:start]
  rules ||= options[:rules] || []
  @rules = rules.inject({}) {|memo, rule| memo.merge(rule.sym => rule)}
  @packrat = {}

  # Add parser reference to each rule
  @rules.each_value {|rule| rule.parser = self}

  # Take whitespace from options, a named rule, a `pass` rule, a rule named :WS, or a default
  @whitespace = case options[:whitespace]
  when Regexp then options[:whitespace]
  when Symbol then @rules[options[:whitespace]]
  else options[:whitespace]
  end ||
    @rules.values.detect(&:pass?) ||
    /(?:\s|(?:#[^x][^\n\r]*))+/m.freeze

  @options = options.dup
  @productions = []
  @parse_callback = block
  @error_log = []
  @prod_data = []

  @scanner = EBNF::LL1::Scanner.new(input)
  start = start.split('#').last.to_sym unless start.is_a?(Symbol)
  start_rule = @rules[start]
  raise Error, "Starting production #{start.inspect} not defined" unless start_rule

  result = start_rule.parse(scanner)
  if result == :unmatched
    # Start rule wasn't matched, which is about the only error condition
    error("--top--", @furthest_failure.to_s,
      pos: @furthest_failure.pos,
      lineno: @furthest_failure.lineno,
      rest: scanner.string[@furthest_failure.pos, 20])
  end

  # Eat any remaining whitespace
  start_rule.eat_whitespace(scanner)
  if !scanner.eos?
    error("--top--", @furthest_failure.to_s,
      pos: @furthest_failure.pos,
      lineno: @furthest_failure.lineno,
      rest: scanner.string[@furthest_failure.pos, 20])
  end

  # When all is said and done, raise the error log
  unless @error_log.empty?
    raise Error, @error_log.join("\n")
  end

  result
end
prod_data() click to toggle source

Current ProdData element

# File lib/ebnf/peg/parser.rb, line 263
def prod_data; @prod_data.last || {}; end
progress(node, *args, &block) click to toggle source

Progress logged when parsing. Passed as level ‘1` logger messages.

The call is ignored, unless ‘@options` is set.

@overload progress(node, message, **options, &block)

@param [String] node Relevant location associated with message
@param [String] message ("")
@param [Hash] options
@option options [Integer] :depth
    Recursion depth for indenting output

@see debug

# File lib/ebnf/peg/parser.rb, line 330
def progress(node, *args, &block)
  return unless @options[:logger]
  args << {} unless args.last.is_a?(Hash)
  args.last[:level] ||= 1
  debug(node, *args, &block)
end
terminal_options(sym) click to toggle source

Find a regular expression defined for a terminal

@param [Symbol] sym @return [Regexp]

# File lib/ebnf/peg/parser.rb, line 485
def terminal_options(sym)
  self.class.terminal_options[sym]
end
terminal_regexp(sym) click to toggle source

Find a regular expression defined for a terminal

@param [Symbol] sym @return [Regexp]

# File lib/ebnf/peg/parser.rb, line 476
def terminal_regexp(sym)
  self.class.terminal_regexps[sym]
end
update_furthest_failure(pos, lineno, token) click to toggle source

Record furthest failure.

@param [Integer] pos

The position in the input stream where the failure occured.

@param [Integer] lineno

Line where the failure occured.

@param [Symbol, String] token

The terminal token or string which attempted to match.

@see arxiv.org/pdf/1405.6646.pdf

# File lib/ebnf/peg/parser.rb, line 499
def update_furthest_failure(pos, lineno, token)
  # Skip generated productions
  return if token.is_a?(Symbol) && token.to_s.start_with?('_')
  if @furthest_failure.nil? || pos > @furthest_failure.pos
    @furthest_failure = Unmatched.new(pos, lineno, [token])
  elsif pos == @furthest_failure.pos && !@furthest_failure[:expecting].include?(token)
    @furthest_failure[:expecting] << token
  end
end
warn(node, message, **options) click to toggle source

Warning information, used as level ‘2` logger messages. Messages may be logged and are saved for reporting at end of parsing.

@param [String] node Relevant location associated with message @param [String] message Error string @param [Hash] options @option options [URI, to_s] :production @option options [Token] :token @see debug

# File lib/ebnf/peg/parser.rb, line 308
def warn(node, message, **options)
  lineno = options[:lineno] || (scanner.lineno if scanner)
  m = "WARNING "
  m += "[line: #{lineno}] " if lineno
  m += message
  m += " (found #{options[:rest].inspect})" if options[:rest]
  m += ", production = #{options[:production].inspect}" if options[:production]
  debug(node, m, level: 2, **options)
end