class Fop::Tokenizer

Constants

ESCAPE
EXP_CLOSE
EXP_OPEN
Escapes
OP_ADD
OP_APPEND
OP_PREPEND
OP_REPLACE
OP_SUB
REGEX_CAPTURE
REGEX_DELIM
Token
WHITESPACE
WILDCARD

Attributes

escape[R]

Controls which “mode” the tokenizer is currently in. This is a necessary result of the syntax lacking explicit string delimiters. That could be worked around by requiring users to escape all reserved chars, but that's ugly af. Instead, the parser continually assesses the current context and flips these flags on or off to auto-escape certain chars for the next token.

Public Class Methods

new(src) click to toggle source
# File lib/fop/tokenizer.rb, line 29
def initialize(src)
  @src = src
  @end = src.size - 1
  @start_i = 0
  @i = 0
  reset_escapes!
end

Public Instance Methods

next() click to toggle source
# File lib/fop/tokenizer.rb, line 53
def next
  return Token.new(@i, Tokens::EOF) if @i > @end
  char = @src[@i]
  case char
  when EXP_OPEN
    @i += 1
    token! Tokens::EXP_OPEN
  when EXP_CLOSE
    @i += 1
    token! Tokens::EXP_CLOSE
  when WILDCARD
    @i += 1
    token! Tokens::WILDCARD, WILDCARD
  when REGEX_DELIM
    if @escape.regex
      get_str!
    else
      @i += 1
      token! Tokens::REG_DELIM
    end
  when REGEX_CAPTURE
    if @escape.regex_capture
      get_str!
    else
      @i += 1
      t = token! Tokens::REG_CAPTURE, @src[@i]
      @i += 1
      @start_i = @i
      t
    end
  when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
    if @escape.operators
      get_str!
    else
      @i += 1
      token! Tokens::OPERATOR, char
    end
  when WHITESPACE
    if @escape.whitespace
      get_str!
    elsif !@escape.whitespace_sep
      @i += 1
      token! Tokens::WHITESPACE_SEP
    else
      @i += 1
      @start_i = @i
      self.next
    end
  else
    get_str!
  end
end
regex_mode!() click to toggle source

Auto-escape anything you'd find in a regular expression

# File lib/fop/tokenizer.rb, line 43
def regex_mode!
  @escape.whitespace = true
  @escape.regex = false # look for the final /
  @escape.regex_escape = true # pass \ through to the regex engine UNLESS it's followed by a /
  @escape.wildcards = true
  @escape.operators = true
  @escape.regex_capture = true
  @escape.exp = true
end
reset_escapes!() click to toggle source

Auto-escape operators and regex capture vars. Appropriate for top-level syntax.

# File lib/fop/tokenizer.rb, line 38
def reset_escapes!
  @escape = Escapes.new(true, true, true, true)
end

Private Instance Methods

get_str!() click to toggle source
# File lib/fop/tokenizer.rb, line 114
def get_str!
  str = ""
  escape, found_end = false, false
  until found_end or @i > @end
    char = @src[@i]

    if escape
      @i += 1
      str << char
      escape = false
      next
    end

    case char
    when ESCAPE
      @i += 1
      if @escape.regex_escape and @src[@i] != REGEX_DELIM
        str << char
      else
        escape = true
      end
    when EXP_OPEN
      if @escape.exp
        @i += 1
        str << char
      else
        found_end = true
      end
    when EXP_CLOSE
      if @escape.exp
        @i += 1
        str << char
      else
        found_end = true
      end
    when WILDCARD
      if @escape.wildcards
        @i += 1
        str << char
      else
        found_end = true
      end
    when REGEX_DELIM
      if @escape.regex
        @i += 1
        str << char
      else
        found_end = true
      end
    when REGEX_CAPTURE
      if @escape.regex_capture
        @i += 1
        str << char
      else
        found_end = true
      end
    when OP_REPLACE, OP_APPEND, OP_PREPEND, OP_ADD, OP_SUB
      if @escape.operators
        @i += 1
        str << char
      else
        found_end = true
      end
    when WHITESPACE
      if @escape.whitespace
        @i += 1
        str << char
      else
        found_end = true
      end
    else
      @i += 1
      str << char
    end
  end

  return Token.new(@i - 1, Tokens::TR_ESC) if escape
  token! Tokens::TEXT, str
end
token!(type, val = nil) click to toggle source
# File lib/fop/tokenizer.rb, line 108
def token!(type, val = nil)
  t = Token.new(@start_i, type, val)
  @start_i = @i
  t
end