class Klam::Lexer

Constants

SYMBOL_CHARS

Public Class Methods

new(stream) click to toggle source
# File lib/klam/lexer.rb, line 15
def initialize(stream)
  @stream = stream
  @buffer = []
end

Public Instance Methods

eof?() click to toggle source
# File lib/klam/lexer.rb, line 20
def eof?
  @buffer.empty? && @stream.eof?
end
getc() click to toggle source
# File lib/klam/lexer.rb, line 24
def getc
  if @buffer.empty?
    @stream.getc
  else
    @buffer.pop
  end
end
next() click to toggle source
# File lib/klam/lexer.rb, line 36
def next
  drain_whitespace
  unless eof?
    c = getc
    case c
    when '('
      OpenParen.instance
    when ')'
      CloseParen.instance
    when '"'
      consume_string
    when SYMBOL_CHARS
      ungetc(c)
      consume_number_or_symbol
    else
      raise Klam::SyntaxError, "illegal character: #{c}"
    end
  end
end
ungetc(c) click to toggle source
# File lib/klam/lexer.rb, line 32
def ungetc(c)
  @buffer.push(c)
end

Private Instance Methods

consume_number() click to toggle source
# File lib/klam/lexer.rb, line 78
def consume_number
  # Shen allows multiple leading plusses and minuses. The plusses
  # are ignored and an even number of minuses cancel each other.
  # Thus '------+-7' is read as 7.
  #
  # The Shen reader parses "7." as the integer 7 and the symbol '.'
  decimal_seen = false
  negative = false
  past_sign = false
  chars = []
  loop do
    break if eof?
    c = getc
    if c =~ /\d/
      past_sign = true
      chars << c
    elsif c == '.' && !decimal_seen
      past_sign = true
      decimal_seen = true
      chars << c
    elsif c == '+' && !past_sign
      # ignore
    elsif c == '-' && !past_sign
      negative = !negative
    else
      ungetc c
      break
    end
  end
  chars.unshift('-') if negative
  if chars.last == '.'
    # A trailing decimal point is treated as part of the next
    # token. Forget we saw it.
    ungetc(chars.pop)
    decimal_seen = false
  end
  str = chars.join
  decimal_seen ? str.to_f : str.to_i
end
consume_number_or_symbol() click to toggle source
# File lib/klam/lexer.rb, line 141
def consume_number_or_symbol
  # First drain optional leading signs
  # Then drain optional decimal point
  # If there is another character and it is a digit, then it
  # is a number. Otherwise it is a symbol.
  chars = []
  loop do
    break if eof?
    c = getc
    unless c =~ /[-+]/
      ungetc c
      break
    end
    chars << c
  end
  if eof?
    chars.reverse.each {|x| ungetc x}
    return consume_symbol
  end

  c = getc
  chars << c
  if c == '.'
    if eof?
      chars.reverse.each {|x| ungetc x}
      return consume_symbol
    end
    c = getc
    chars << c
    chars.reverse.each {|x| ungetc x}
    if c =~ /\d/
      return consume_number
    else
      return consume_symbol
    end
  elsif c =~ /\d/
    chars.reverse.each {|x| ungetc x}
    return consume_number
  else
    chars.reverse.each {|x| ungetc x}
    return consume_symbol
  end
end
consume_string() click to toggle source
# File lib/klam/lexer.rb, line 67
def consume_string
  chars = []
  loop do
    raise Klam::SyntaxError, "unterminated string" if eof?
    c = getc
    break if c == '"'
    chars << c
  end
  chars.join
end
consume_symbol() click to toggle source
# File lib/klam/lexer.rb, line 118
def consume_symbol
  chars = []
  loop do
    break if eof?
    c = getc
    unless c =~ SYMBOL_CHARS
      ungetc c
      break
    end
    chars << c
  end
  str = chars.join

  case str
  when 'true'
    true
  when 'false'
    false
  else
    str.to_sym
  end
end
drain_whitespace() click to toggle source
# File lib/klam/lexer.rb, line 57
def drain_whitespace
  until eof?
    c = getc
    if c =~ /\S/
      ungetc(c)
      break
    end
  end
end