class Lisp::Tokenizer

Attributes

line_number[R]

Public Class Methods

from_file(f) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 11
def self.from_file(f)
  self.new(f)
end
from_string(str, absorb_space=true) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 7
def self.from_string(str, absorb_space=true)
  self.new(StringIO.new(str, "r"), absorb_space)
end
new(src, absorb_space=true) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 15
def initialize(src, absorb_space=true)
  @absorb_whitespace = absorb_space
  @source = src
  @position = 0
  @lookahead_token = nil
  @lookahead_literal = ''
  @eof = false
  @almost_eof = false
  @curent_ch = nil
  @next_ch = nil
  @next_next_ch = nil
  init
end

Public Instance Methods

advance() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 30
def advance
  if @source.eof?
    @eof = true
    @almost_eof = true
    @current_ch = nil
    @next_ch = nil
  else
    @current_ch = @source.readchar
    if @source.eof?
      @almost_eof = true
      @next_ch = nil
    else
      @next_ch = @source.readchar
      if @source.eof?
        @next_next_ch = nil
      else
        @next_next_ch = @source.readchar
        @source.ungetc(@next_next_ch)
      end
      @source.ungetc(@next_ch)
    end
  end
end
almost_eof?() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 62
def almost_eof?
  @almost_eof
end
consume_token() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 313
def consume_token
  @position = @source.pos
  @lookahead_token, @lookahead_literal = self.read_next_token
  consume_token if @lookahead_token == :COMMENT
end
digit?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 74
def digit?(ch)
  ch =~ /[[:digit:]]/
end
divider?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 193
def divider?(ch)
  ch =~ /[[[:space:]]\(\)\{\}<>\[\]]/
end
eof?() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 58
def eof?
  @eof
end
graph?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 88
def graph?(ch)
  ch =~ /[[:graph:]]/
end
hex?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 70
def hex?(ch)
  ch =~ /[abcdefABCDEF]/
end
init() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 319
def init
  @line_number = 0
  advance
  consume_token
end
letter?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 66
def letter?(ch)
  ch =~ /[[:alpha:]]/
end
next_token() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 54
def next_token
  return @position, @lookahead_token, @lookahead_literal
end
number?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 78
def number?(ch)
  digit?(ch) || (ch == '-' && digit?(@next_ch))
end
process_escapes(str) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 151
def process_escapes(str)
  i = 0
  processed_str = ""
  while i < str.length
    if str[i] == '\\'
      processed_str << if i < (str.length - 1)
                         i += 1
                         case (str[i])
                         when 'n'
                           "\n"
                         when 't'
                           "\t"
                         when '\\'
                           "\\"
                         else
                           "\\#{str[i]}"
                         end
                       else
                         "\\"
                       end
    else
      processed_str << str[i]
    end
    i += 1
  end
  processed_str
end
read_character() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 198
def read_character
  lit = ""
  lit << @current_ch
  advance
  while !eof? && !divider?(@current_ch)
    lit << @current_ch
    advance
  end

  return :CHARACTER, lit
end
read_next_token() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 211
def read_next_token
  return :EOF, '' if eof?

  if @absorb_whitespace
    while space?(@current_ch)
      @line_number += 1 if @current_ch == "\n"
      advance
      @position = @source.pos
      return :EOF, '' if eof?
    end
  end
  
  if !@absorb_whitespace && space?(@current_ch)
    @line_number += 1 if @current_ch == "\n"
    advance
    return :WHITESPACE, " "
  elsif number?(@current_ch)
    return read_number(false)
  elsif @current_ch == '-' && number?(@next_ch)
    return read_number(false)
  elsif @current_ch == '#' && @next_ch == 'x'
    advance
    advance
    return read_number(true)
  elsif @current_ch == '"'
    advance
    return read_string
  elsif @current_ch == '#' && @next_ch == '\\'
    advance
    advance
    return read_character
  elsif @current_ch == '\'' && @next_ch == '{'
    advance
    advance
    return :QUOTE_LBRACE, "'{"
  elsif @current_ch == '\'' && @next_ch == '#' && @next_next_ch == '('
    advance
    advance
    advance
    return :QUOTE_HASH_LPAREN, "'#("
  elsif @current_ch == '\''
    advance
    return :QUOTE, "'"
  elsif @current_ch == '`'
    advance
    return :BACKQUOTE, "`"
  elsif @current_ch == ',' && @next_ch == '@'
    advance
    advance
    return :COMMAAT, ",@"
  elsif @current_ch == ','
    advance
    return :COMMA, ","
  elsif @current_ch == '('
    advance
    return :LPAREN, "("
  elsif @current_ch == '#' && @next_ch == '('
    advance
    advance
    return :HASH_LPAREN, "#("
  elsif @current_ch == ')'
    advance
    return :RPAREN, ")"
  elsif @current_ch == '{'
    advance
    return :LBRACE, "{"
  elsif @current_ch == '}'
    advance
    return :RBRACE, "}"
  elsif @current_ch == '['
    advance
    return :LBRACKET, "["
  elsif @current_ch == ']'
    advance
    return :RBRACKET, "]"
  elsif @current_ch == '.' && space?(@next_ch)
    advance
    return :PERIOD, "."
  elsif @current_ch == '.' && symbol_character?(@next_ch)
    return read_symbol
    elsif symbol_character?(@current_ch)
      return read_symbol
    elsif @current_ch == '#' && @next_ch == 't'
    advance
    advance
    return :TRUE, "#t"
  elsif @current_ch == '#' && @next_ch == 'f'
    advance
    advance
    return :FALSE, "#f"
  elsif @current_ch == ';'
    lit = ""
    while true
      return :COMMENT, lit if eof? || @current_ch == "\n"
      lit << @current_ch
      advance
    end
  else
    return :ILLEGAL, @current_ch
  end
end
read_number(hex) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 126
def read_number(hex)
  lit = ""
  if @current_ch == '-'
    lit << @current_ch
    advance
  end
  is_float = false
  while !eof? && (digit?(@current_ch) || (hex && hex?(@current_ch)) || (!hex && !is_float && @current_ch == '.'))
    is_float ||= !hex && (@current_ch == '.')
    lit << @current_ch
    advance
  end

  tok = if hex
          :HEXNUMBER
        elsif is_float
          :FLOAT
        else
          :NUMBER
        end

  return tok, lit
end
read_string() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 180
def read_string
  lit = ""
  while !eof? && @current_ch != '"'
    lit << @current_ch
    advance
  end

  return :EOF, '' if eof?
  advance
  return :STRING, process_escapes(lit)
end
read_symbol() click to toggle source
# File lib/rubylisp/tokenizer.rb, line 97
def read_symbol
  lit = ""
  tok = nil
  if @current_ch == '.'
    lit << @current_ch
    advance
    tok = :FFI_SEND_SYMBOL
  end

  while !eof? && ((@current_ch == '.' && !symbol_character?(@next_ch) && tok.nil?) ||
                  (@current_ch == '/' && symbol_character?(@next_ch)) ||
                  (symbol_character?(@current_ch)))
    tok ||= :FFI_NEW_SYMBOL if @current_ch == '.' && !lit.empty?
    tok = :FFI_STATIC_SYMBOL if @current_ch == '/' && !lit.empty?
    lit << @current_ch
    advance
  end

  tok ||= :SYMBOL
  return tok, case tok
              when :SYMBOL, :FFI_STATIC_SYMBOL
                lit
              when :FFI_SEND_SYMBOL
                lit[1..-1]
              when :FFI_NEW_SYMBOL
                lit[0..-2]
              end
end
space?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 83
def space?(ch)
  ch =~ /[[:space:]]/
end
symbol_character?(ch) click to toggle source
# File lib/rubylisp/tokenizer.rb, line 93
def symbol_character?(ch)
  graph?(ch) && !("();\"'`|[]{}#,.".include?(ch))
end