module EBNF::Native

Public Instance Methods

alt(s) click to toggle source

Parse alt

>>> alt("a | b | c")
((alt a b c) '')

@param [String] s @return [Array]

# File lib/ebnf/native.rb, line 143
def alt(s)
  debug("alt") {"(#{s.inspect})"}
  args = []
  while !s.to_s.empty?
    e, s = depth {seq(s)}
    debug {"=> seq returned #{[e, s].inspect}"}
    if e.to_s.empty?
      break unless args.empty?
      e = [:seq, []] # empty sequence
    end
    args << e
    unless s.to_s.empty?
      t, ss = depth {terminal(s)}
      break unless t[0] == :alt
      s = ss
    end
  end
  args.length > 1 ? [args.unshift(:alt), s] : [e, s]
end
diff(s) click to toggle source

parse diff

>>> diff("a - b")
((diff a b) '')
# File lib/ebnf/native.rb, line 198
def diff(s)
  debug("diff") {"(#{s.inspect})"}
  e1, s = depth {postfix(s)}
  debug {"=> postfix returned #{[e1, s].inspect}"}
  unless e1.to_s.empty?
    unless s.to_s.empty?
      t, ss = depth {terminal(s)}
      debug {"diff #{[t, ss].inspect}"}
      if t.is_a?(Array) && t.first == :diff
        s = ss
        e2, s = primary(s)
        unless e2.to_s.empty?
          return [[:diff, e1, e2], s]
        else
          error("diff", "Syntax Error")
          raise SyntaxError, "diff missing second operand"
        end
      end
    end
  end
  [e1, s]
end
eachRule(scanner) { |r| ... } click to toggle source

Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.

Iterate over rule strings. a line that starts with ‘[’ or ‘@’ starts a new rule

@param [StringScanner] scanner @yield rule_string @yieldparam [String] rule_string

# File lib/ebnf/native.rb, line 12
def eachRule(scanner)
  cur_lineno = 1
  r = ''
  until scanner.eos?
    case
    when s = scanner.scan(%r(\s+)m)
      # Eat whitespace
      cur_lineno += s.count("\n")
      #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
    when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
      # Eat comments /* .. */
      cur_lineno += s.count("\n")
      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
    when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
      # Eat comments (* .. *)
      cur_lineno += s.count("\n")
      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
    when s = scanner.scan(%r((#(?!x)|//).*$))
      # Eat comments // & #
      cur_lineno += s.count("\n")
      debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
    when s = scanner.scan(/\A["']/)
      # Found a quote, scan until end of matching quote
      s += scanner.scan_until(/#{scanner.matched}|$/)
      r += s
    when s = scanner.scan(%r(^@terminals))
      #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
      yield(r) unless r.empty?
      @lineno = cur_lineno
      yield(s)
      r = ''
    when s = scanner.scan(/@pass/)
      # Found rule start, if we've already collected a rule, yield it
      #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
      yield r unless r.empty?
      @lineno = cur_lineno
      r = s
    when s = scanner.scan(EBNF::Terminals::LHS)
      # Found rule start, if we've already collected a rule, yield it
      yield r unless r.empty?
      #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
      @lineno = cur_lineno
      r = s
    else
      # Collect until end of line, or start of comment or quote
      s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
      if scanner.matched.length > 0
        # Back up scan head before ending match
        scanner.pos = scanner.pos - scanner.matched.length

        # Remove matched from end of string
        s = s[0..-(scanner.matched.length+1)]
      end
      cur_lineno += s.count("\n")
      #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
      r += s
    end
  end
  yield r unless r.empty?
end
expression(s) click to toggle source

Parse a string into an expression tree and a remaining string

@example

>>> expression("a b c")
((seq a b c) '')

>>> expression("a? b+ c*")
((seq (opt a) (plus b) (star c)) '')

>>> expression(" | x xlist")
((alt (seq) (seq x xlist)) '')

>>> expression("a | (b - c)")
((alt a (diff b c)) '')

>>> expression("a b | c d")
((alt (seq a b) (seq c d)) '')

>>> expression("a | b | c")
((alt a b c) '')

>>> expression("a) b c")
(a ' b c')

>>> expression("BaseDecl? PrefixDecl*")
((seq (opt BaseDecl) (star PrefixDecl)) '')

>>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
((alt NCCHAR1 diff
      (range '0-9')
      (hex '#x00B7')
      (range '#x0300-#x036F')
      (range, '#x203F-#x2040')) '')

@param [String] s @return [Array]

# File lib/ebnf/native.rb, line 125
def expression(s)
  debug("expression") {"(#{s.inspect})"}
  e, s = depth {alt(s)}
  debug {"=> alt returned #{[e, s].inspect}"}
  unless s.to_s.empty?
    t, ss = depth {terminal(s)}
    debug {"=> terminal returned #{[t, ss].inspect}"}
    return [e, ss] if t.is_a?(Array) && t.first == :")"
  end
  [e, s]
end
postfix(s) click to toggle source

parse postfix

>>> postfix("a b c")
(a ' b c')

>>> postfix("a? b c")
((opt, a) ' b c')
# File lib/ebnf/native.rb, line 229
def postfix(s)
  debug("postfix") {"(#{s.inspect})"}
  e, s = depth {primary(s)}
  debug {"=> primary returned #{[e, s].inspect}"}
  return ["", s] if e.to_s.empty?
  if !s.to_s.empty?
    t, ss = depth {terminal(s)}
    debug {"=> #{[t, ss].inspect}"}
    if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
      return [[t.first, e], ss]
    end
  end
  [e, s]
end
primary(s) click to toggle source

parse primary

>>> primary("a b c")
(a ' b c')
# File lib/ebnf/native.rb, line 249
def primary(s)
  debug("primary") {"(#{s.inspect})"}
  t, s = depth {terminal(s)}
  debug {"=> terminal returned #{[t, s].inspect}"}
  if t.is_a?(Symbol) || t.is_a?(String)
    [t, s]
  elsif %w(range hex).map(&:to_sym).include?(t.first)
    [t, s]
  elsif t.first == :"("
    e, s = depth {expression(s)}
    debug {"=> expression returned #{[e, s].inspect}"}
    [e, s]
  else
    ["", s]
  end
end
ruleParts(rule) click to toggle source

Parse a rule into an optional rule number, a symbol and an expression

@param [String] rule @return [Rule]

# File lib/ebnf/native.rb, line 78
def ruleParts(rule)
  num_sym, expr = rule.split('::=', 2).map(&:strip)
  num, sym = num_sym.split(']', 2).map(&:strip)
  num, sym = "", num if sym.nil?
  num = num[1..-1]
  r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
  debug("ruleParts") { r.inspect }
  r
end
seq(s) click to toggle source

parse seq

>>> seq("a b c")
((seq a b c) '')

>>> seq("a b? c")
((seq a (opt b) c) '')
# File lib/ebnf/native.rb, line 171
def seq(s)
  debug("seq") {"(#{s.inspect})"}
  args = []
  while !s.to_s.empty?
    e, ss = depth {diff(s)}
    debug {"=> diff returned #{[e, ss].inspect}"}
    unless e.to_s.empty?
      args << e
      s = ss
    else
      break;
    end
  end
  if args.length > 1
    [args.unshift(:seq), s]
  elsif args.length == 1
    args + [s]
  else
    ["", s]
  end
end
terminal(s) click to toggle source

parse one terminal; return the terminal and the remaining string

A terminal is represented as a tuple whose 1st item gives the type; some types have additional info in the tuple.

@example

>>> terminal("'abc' def")
('abc' ' def')

>>> terminal("[0-9]")
((range '0-9') '')
>>> terminal("#x00B7")
((hex '#x00B7') '')
>>> terminal ("\[#x0300-#x036F\]")
((range '#x0300-#x036F') '')
>>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
((range "^<>'{}|^`") '-\[#x00-#x20\]')
# File lib/ebnf/native.rb, line 284
def terminal(s)
  s = s.strip
  #STDERR.puts s.inspect
  case m = s[0,1]
  when '"', "'" # STRING1 or STRING2
    l, s = s[1..-1].split(m.rstrip, 2)
    [Unescape.unescape_string(l), s]
  when '[' # RANGE, O_RANGE
    # Includes RANGE and O_RANGE which can't include a ']'
    l, s = s[1..-1].split(']', 2)
    [[:range, Unescape.unescape_string(l)], s]
  when '#' # HEX
    s.match(/(#x\h+)(.*)$/)
    l, s = $1, $2
    [[:hex, l], s]
  when /[\w\.]/ # SYMBOL
    s.match(/([\w\.]+)(.*)$/)
    l, s = $1, $2
    [l.to_sym, s]
  when '-'
    [[:diff], s[1..-1]]
  when '?'
    [[:opt], s[1..-1]]
  when '|'
    [[:alt], s[1..-1]]
  when '+'
    [[:plus], s[1..-1]]
  when '*'
    [[:star], s[1..-1]]
  when /[\(\)]/ # '(' or ')'
    [[m.to_sym], s[1..-1]]
  else
    error("terminal", "unrecognized terminal: #{s.inspect}")
    raise SyntaxError, "unrecognized terminal: #{s.inspect}"
  end
end