class Arugula::Parser

Attributes

pattern[R]

Public Class Methods

new(str) click to toggle source
# File lib/arugula/parser.rb, line 6
def initialize(str)
  @pattern = str.dup
  @states = [AndPart.new]
  @captures = []
end

Public Instance Methods

consume() click to toggle source
# File lib/arugula/parser.rb, line 28
def consume
  tok = pattern.slice!(0)
  peek = pattern.chr
  if tok.nil?
    fail 'shouldnt happen'
  elsif tok == '[' && !characterclass_type?
    push_part(:characterclass)
  elsif tok == '-' &&
        characterclass_type? &&
        state.parts.last.class.type == :literal &&
        peek != ']'
    literal = state.parts.pop
    push_part(:range, literal.literal, peek)
    pattern.slice!(0)
  elsif tok == ']' && characterclass_type?
    pop_part
  elsif tok == '^' && characterclass_type? && state.parts.empty?
    characterclass_part = pop_part
    wrap_state(:not)
    @states << characterclass_part
  elsif tok == '$'
    push_part(:eol)
  elsif tok == '^'
    push_part(:sol)
  elsif tok == '\\'
    tok = pattern.slice!(0)
    case tok
    when nil
      fail 'unterminated escape sequence'
    when *MetacharacterPart::MATCHERS.keys.map(&:to_s)
      push_part(:metacharacter, tok)
    else
      push_part(:literal, tok)
    end
  elsif characterclass_type?
    push_part(:literal, tok)
  elsif tok == '('
    push_capture
  elsif tok == ')'
    pop_part until capture_type?
    pop_part
  elsif tok == '|'
    pop_part until state == @states.first || or_type? || capture_type?
    wrap_state(:or) unless or_type?
    push_part(:and)
  elsif tok == '.'
    push_part(:dot)
  elsif tok == '*'
    wrap_state(:star)
  elsif tok == '+'
    wrap_state(:plus)
  elsif tok == '?'
    wrap_state(:question)
  elsif tok == '{'
    before_comma = ''
    after_comma = ''
    until pattern.chr == ',' || pattern.chr == '}'
      before_comma << pattern.slice!(0)
    end
    if pattern.chr == ','
      pattern.slice!(0)
    else
      after_comma = before_comma
    end
    after_comma << pattern.slice!(0) until pattern.chr == '}'
    pattern.slice!(0) if pattern.chr == '}'
    before = before_comma.empty? ? 0 : before_comma.to_i
    after = after_comma.empty? ? Float::INFINITY : after_comma.to_i
    wrap_state(:quantifier, before, after)
  else
    push_part(:literal, tok)
  end
end
parse!() click to toggle source
# File lib/arugula/parser.rb, line 16
def parse!
  consume until pattern.empty?
  [@states.first, @captures]
end
pop_part() click to toggle source
# File lib/arugula/parser.rb, line 117
def pop_part
  @states.pop until @states.last.respond_to?(:parts)
  @states.pop
end
push_capture() click to toggle source
# File lib/arugula/parser.rb, line 122
def push_capture
  push_part(:capture, @captures.size.succ.to_s)
  @captures << state
  push_part(:and)
end
push_part(name, *content) click to toggle source
# File lib/arugula/parser.rb, line 102
def push_part(name, *content)
  part_class = Part.all.find { |p| p.type == name }
  fail "Unknown part type #{name}" unless part_class
  part = part_class.new(*content)
  state.parts << part
  @states << part unless name == :literal
end
state() click to toggle source
# File lib/arugula/parser.rb, line 12
def state
  @states.reverse_each.find { |s| s.respond_to?(:parts) }
end
wrap_state(name, *content) click to toggle source
# File lib/arugula/parser.rb, line 110
def wrap_state(name, *content)
  wrapped = Part.all.find { |p| p.type == name }
                .new(*content, state.parts.pop)
  state.parts << wrapped
  @states << wrapped
end