class Zenlish::Lexer::Lexer

Attributes

line_start[R]

@return [Integer] Offset of start of current line within IO (one-based).

lineno[R]

@return [Integer] Current line number (one-based)

scanner[R]

@return [StringScanner] Low-level scanner object.

zenlish_mode[R]

@return [Boolean] true, if lexer is currently busy to scan Zenlish.

Public Class Methods

new(source) click to toggle source

Constructor. Initialize a tokenizer for Skeem. @param source [String] Skeem text to tokenize.

# File lib/zenlish/lexer/lexer.rb, line 31
def initialize(source)
  @scanner = StringScanner.new('')
  @zenlish_mode = true
  reinitialize(source)
end

Public Instance Methods

reinitialize(source) click to toggle source

@param source [String] Skeem text to tokenize.

# File lib/zenlish/lexer/lexer.rb, line 38
def reinitialize(source)
  @scanner.string = source
  @lineno = 1
  @line_start = 0
end
tokens() click to toggle source

@return [Array<Token>] | Returns a sequence of tokens

# File lib/zenlish/lexer/lexer.rb, line 45
def tokens
  tok_sequence = []
  until @scanner.eos?
    token = _next_token
    tok_sequence << token unless token.nil?
  end

  return tok_sequence
end

Private Instance Methods

_next_token() click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 57
def _next_token
  skip_intertoken_spaces
  if zenlish_mode
    next_zenlish_token
  else
    next_json_token
  end
end
build_token(aSymbolName, aLiteral, _format = :default) click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 87
def build_token(aSymbolName, aLiteral, _format = :default)
  begin
    col = scanner.pos - aLiteral.size - @line_start + 1
    pos = Rley::Lexical::Position.new(@lineno, col)
    token = Rley::Lexical::Token.new(aLiteral, aSymbolName, pos)
  rescue StandardError => e
    puts "Failing with '#{aSymbolName}' and '#{aLiteral}'"
    raise e
  end

  return token
end
next_json_token() click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 100
def next_json_token
  curr_ch = scanner.peek(1)
  return nil if curr_ch.nil? || curr_ch.empty?

  token = nil

  if ':,."'.include? curr_ch
    # Delimiters, separators => single character token
    token = build_token(@@punct2name[curr_ch], scanner.getch)
  elsif (literal = scanner.scan(/[^\s:;,."]+/))
    token = build_token('WORD', literal)
  else # Unknown token
    erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
    sequel = scanner.scan(/.{1,20}/)
    erroneous += sequel unless sequel.nil?
    raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
  end

  return token
end
next_line() click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 138
def next_line
  @lineno += 1
  @line_start = scanner.pos
end
next_zenlish_token() click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 66
def next_zenlish_token
  curr_ch = scanner.peek(1)
  return nil if curr_ch.nil? || curr_ch.empty?

  token = nil

  if ':,."'.include? curr_ch
    # Delimiters, separators => single character token
    token = build_token(@@punct2name[curr_ch], scanner.getch)
  elsif (literal = scanner.scan(/[^\s:;,."]+/))
    token = build_token('WORD', literal)
  else # Unknown token
    erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
    sequel = scanner.scan(/.{1,20}/)
    erroneous += sequel unless sequel.nil?
    raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
  end

  return token
end
skip_intertoken_spaces() click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 121
def skip_intertoken_spaces
  pre_pos = scanner.pos

  loop do
    ws_found = scanner.skip(/[ \t\f]+/) ? true : false
    nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
    if nl_found
      ws_found = true
      next_line
    end
    break unless ws_found
  end

  curr_pos = scanner.pos
  return if curr_pos == pre_pos
end