class Tracksperanto::ShakeGrammar::Lexer
Since Shake uses a C-like language for it's scripts we rig up a very sloppy but concise C-like lexer to cope
Constants
- AT_FRAME
- FLOAT_ATOM
- INT_ATOM
- MAX_BUFFER_SIZE
- MAX_STACK_DEPTH
- STR_ATOM
Attributes
sentinel[R]
Access to the sentinel object
stack[R]
Parsed stack
Public Class Methods
new(with_io, sentinel = nil, limit_to_one_stmt = false, stack_depth = 0)
click to toggle source
The first argument is the IO handle to the data of the Shake script. The second argument is a “sentinel” that is going to be passed to the downstream lexers instantiated for nested data structures. You can use the sentinel to collect data from child nodes for example.
# File lib/import/shake_grammar/lexer.rb, line 25 def initialize(with_io, sentinel = nil, limit_to_one_stmt = false, stack_depth = 0) # We parse byte by byte, but reading byte by byte is very slow. We therefore use a buffering reader # that will cache in chunks, and then read from there byte by byte. # This yields a substantial speedup (4.9 seconds for the test # as opposed to 7.9 without this). We do check for the proper class only once so that when we use nested lexers # we only wrap the passed IO once, and only if necessary. with_io = Bychar.wrap(with_io) unless with_io.respond_to?(:read_one_char) @io, @stack, @buf, @sentinel, @limit_to_one_stmt, @stack_depth = with_io, [], '', sentinel, limit_to_one_stmt, stack_depth catch(STOP_TOKEN) do loop { parse } end @in_comment ? consume_comment! : consume_atom! end
Private Instance Methods
consume_atom!()
click to toggle source
Grab the minimum atomic value
# File lib/import/shake_grammar/lexer.rb, line 117 def consume_atom! at = @buf.strip erase_buffer return if at.empty? the_atom = case at when INT_ATOM at.to_i when STR_ATOM unquote_s(at) when FLOAT_ATOM at.to_f when AT_FRAME if $1.include?(".") [:value_at, $1.to_f, @stack.pop] else [:value_at, $1.to_i, @stack.pop] end else [:atom, at] end push(the_atom) end
consume_comment!()
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 47 def consume_comment! push_comment erase_buffer end
erase_buffer()
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 164 def erase_buffer @buf = '' end
parse()
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 52 def parse if @buf.length > MAX_BUFFER_SIZE # Wrong format and the buffer is filled up, bail raise WrongInputError, "Atom buffer overflow at #{MAX_BUFFER_SIZE} bytes, this is definitely not a Shake script" end if @stack_depth > MAX_STACK_DEPTH # Wrong format - parentheses overload raise WrongInputError, "Stack overflow at level #{MAX_STACK_DEPTH}, this is probably a LISP program uploaded by accident" end c = @io.read_one_char throw :__stop if c.nil? # IO has run out if c == '/' && (@buf[-1].chr rescue nil) == '/' # Comment start # If some other data from this line has been accumulated we first consume that @buf = @buf[0..-2] # everything except the opening slash of the comment consume_atom! erase_buffer @in_comment = true elsif @in_comment && c == "\n" # Comment end consume_comment! @in_comment = false elsif @in_comment @buf << c elsif !@buf.empty? && (c == "(") # Funcall push([:funcall, @buf.strip] + self.class.new(@io, @sentinel, limit_to_one_stmt = false, @stack_depth + 1).stack) erase_buffer elsif c == '{' # OFX curly braces or a subexpression in a node's knob # Discard subexpr substack = self.class.new(@io, @sentinel, limit_to_one_stmt = true, @stack_depth + 1).stack push(:expr) elsif c == "[" # Array, booring push([:arr, self.class.new(@io).stack]) elsif c == "}"# && @limit_to_one_stmt throw STOP_TOKEN elsif (c == "]" || c == ")" || c == ";" && @limit_to_one_stmt) # Bailing out of a subexpression consume_atom! throw STOP_TOKEN elsif (c == "," && @limit_to_one_stmt) consume_atom! throw STOP_TOKEN elsif (c == ",") consume_atom! elsif (c == "@") consume_atom! @buf << c elsif (c == ";" || c == "\n") # Skip these - the subexpression already is expanded anyway elsif (c == "=") vardef_atom = vardef(@buf.strip) push [:assign, vardef_atom, self.class.new(@io, @sentinel, limit_to_one_stmt = true, @stack_depth + 1).stack.shift] erase_buffer else @buf << c end end
push(atom_array)
click to toggle source
In the default impl. this just puts things on the stack. However, if you want to unwrap structures as they come along (whych you do for big files) you have to override this
# File lib/import/shake_grammar/lexer.rb, line 149 def push(atom_array) @stack << atom_array end
push_comment()
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 43 def push_comment push [:comment, @buf.gsub(/(\s+?)\/\/{1}/, '')] end
unquote_s(string)
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 142 def unquote_s(string) string.strip.gsub(/^\"/, '').gsub(/\"$/, '').gsub(/\\\"/, '"') end
vardef(var_specifier)
click to toggle source
# File lib/import/shake_grammar/lexer.rb, line 153 def vardef(var_specifier) # Since we can have two-word pointers as typedefs (char *) we only use the last # part of the thing as varname. Nodes return the :image type implicitly. varname_re = /\w+$/ varname = var_specifier.scan(varname_re).flatten.join typedef = var_specifier.gsub(varname_re, '').strip typedef = :image if typedef.empty? [:vardef, typedef, varname] end