class Zenlish::Lexer::Lexer
Attributes
line_start[R]
@return [Integer] Offset of start of current line within IO (one-based).
lineno[R]
@return [Integer] Current line number (one-based)
scanner[R]
@return [StringScanner] Low-level scanner object.
zenlish_mode[R]
@return [Boolean] true, if lexer is currently busy to scan Zenlish
.
Public Class Methods
new(source)
click to toggle source
Constructor. Initialize a tokenizer for Skeem. @param source [String] Skeem text to tokenize.
# File lib/zenlish/lexer/lexer.rb, line 31 def initialize(source) @scanner = StringScanner.new('') @zenlish_mode = true reinitialize(source) end
Public Instance Methods
reinitialize(source)
click to toggle source
@param source [String] Skeem text to tokenize.
# File lib/zenlish/lexer/lexer.rb, line 38 def reinitialize(source) @scanner.string = source @lineno = 1 @line_start = 0 end
tokens()
click to toggle source
@return [Array<Token>] | Returns a sequence of tokens
# File lib/zenlish/lexer/lexer.rb, line 45 def tokens tok_sequence = [] until @scanner.eos? token = _next_token tok_sequence << token unless token.nil? end return tok_sequence end
Private Instance Methods
_next_token()
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 57 def _next_token skip_intertoken_spaces if zenlish_mode next_zenlish_token else next_json_token end end
build_token(aSymbolName, aLiteral, _format = :default)
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 87 def build_token(aSymbolName, aLiteral, _format = :default) begin col = scanner.pos - aLiteral.size - @line_start + 1 pos = Rley::Lexical::Position.new(@lineno, col) token = Rley::Lexical::Token.new(aLiteral, aSymbolName, pos) rescue StandardError => e puts "Failing with '#{aSymbolName}' and '#{aLiteral}'" raise e end return token end
next_json_token()
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 100 def next_json_token curr_ch = scanner.peek(1) return nil if curr_ch.nil? || curr_ch.empty? token = nil if ':,."'.include? curr_ch # Delimiters, separators => single character token token = build_token(@@punct2name[curr_ch], scanner.getch) elsif (literal = scanner.scan(/[^\s:;,."]+/)) token = build_token('WORD', literal) else # Unknown token erroneous = curr_ch.nil? ? '' : scanner.scan(/./) sequel = scanner.scan(/.{1,20}/) erroneous += sequel unless sequel.nil? raise ScanError, "Unknown token #{erroneous} on line #{lineno}" end return token end
next_line()
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 138 def next_line @lineno += 1 @line_start = scanner.pos end
next_zenlish_token()
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 66 def next_zenlish_token curr_ch = scanner.peek(1) return nil if curr_ch.nil? || curr_ch.empty? token = nil if ':,."'.include? curr_ch # Delimiters, separators => single character token token = build_token(@@punct2name[curr_ch], scanner.getch) elsif (literal = scanner.scan(/[^\s:;,."]+/)) token = build_token('WORD', literal) else # Unknown token erroneous = curr_ch.nil? ? '' : scanner.scan(/./) sequel = scanner.scan(/.{1,20}/) erroneous += sequel unless sequel.nil? raise ScanError, "Unknown token #{erroneous} on line #{lineno}" end return token end
skip_intertoken_spaces()
click to toggle source
# File lib/zenlish/lexer/lexer.rb, line 121 def skip_intertoken_spaces pre_pos = scanner.pos loop do ws_found = scanner.skip(/[ \t\f]+/) ? true : false nl_found = scanner.skip(/(?:\r\n)|\r|\n/) if nl_found ws_found = true next_line end break unless ws_found end curr_pos = scanner.pos return if curr_pos == pre_pos end