class ExpressionTokenizer
TODO Clean up code now that Lexr::Token
is a dynamic class generator
Attributes
Public Class Methods
Takes a string str and and hash arguments and creates a Lexical token reference of str It will also parse the lexical tokens into an array of items. :keep_escape determines weather or not to keep all escape backslashes, default false
Tokenizer::new
# File libs/lexer.rb, line 532 def initialize(str,args={}) args[:lexer]||=ExpressionLexer super(str,args) #@parsed=parse(args) #@items=@parsed.clone end
Public Instance Methods
Do we have an array? [1,2,3]
# File libs/lexer.rb, line 686 def array?(pos, args={}) return false if self[pos].kind==:whitespace open?(pos,:array) end
assignment? Checks to see if the current position denotes an assignment if :pos is passed it, that wil be used as the starting reference point if :return_pos is passed it will return the associated positions for each element if an assignment is found, otherwise will return nil
# File libs/lexer.rb, line 668 def assignment?(pos, args={}) return_pos=args[:return_pos] || false p1_pos=pos (p2_pos = walk(pos+1)).nil? && (return false) (p3_pos = walk(p2_pos+1)).nil? && (return false) p1=of_type?(p1_pos,:element) p2=of_type?(p2_pos,:equals) p3=of_type?(p3_pos,[:element, :open]) is_assignment=p1 && p2 && p3 if return_pos #return the positions if it is an assignment, otherwise the result of the test is_assignment ? [p1_pos,p2_pos,p3_pos] : nil else is_assignment end end
returns true if the token at pos is a closing token returns true/false if the token at pos is of type :close
# File libs/lexer.rb, line 640 def close?(pos, args={}) close=args[:close] || nil #redundancy is for readability if close!=nil return self[pos].kind==close end of_type?(pos,:close) end
drop drops the first num elements from the tokens array
# File libs/lexer.rb, line 552 def drop(num) start=num-1 self.slice!(start..length) end
# File libs/lexer.rb, line 654 def end?(pos, args={}) close=args[:close] || :nil !(pos<length && !close?(pos,:close=>close) && self[pos].kind!=:end) end
# File libs/lexer.rb, line 706 def hash?(pos,args={}) open?(pos,:hash) end
# File libs/lexer.rb, line 659 def invalid?(pos, invalid_tokens) !(invalid_tokens & [self[pos].kind]).empty? end
# File libs/lexer.rb, line 710 def invalid_character(pos, args={}) msg=args[:msg] || nil end_pos=args[:end_pos] || pos error_class=args[:error] || InvalidCharacter if !error_class.class_of?(ZError) raise ZError.new("\"#{error_class.inspect}\" is not a valid class. :error must be of class ZError or a descendant.", :retry=>false) end retry_var=args[:retry] || true debug(5,:msg=>"Invalid_Character (function/line num is caller)",:stack_pos=>1,:trace_depth=>4) invalid_str=self[0..pos-1].join || "" position=invalid_str.length invalid_str+=self[pos..self.length-1].join if !invalid_str.empty? invalid_char=self[pos].value raise error_class.new(msg, :invalid_str=>invalid_str,:position=>position,:invalid_char=>invalid_char, :retry=>retry_var) end
# File libs/lexer.rb, line 557 def join(str=nil) self.map {|i| i.value}.join(str) end
# File libs/lexer.rb, line 576 def of_type?(pos,types,args={}) raise "Types must be symbol or array" if !(types.class==Symbol || types.class==Array) return false if pos>length-1 if types.class!=Array if ([:element, :open, :close, :hash, :array, :paren] & [types]).empty? return self[pos].kind==types else types=[types] end end valid_types=[] valid_types<<[:word,:number,:quote,:variable] if types.delete(:element) valid_types<<[:l_curly, :l_paren, :l_square] if types.delete(:open) valid_types<<[:r_paren, :r_curly, :r_square] if types.delete(:close) valid_types<<[:l_paren] if types.delete(:paren) valid_types<<[:l_curly] if types.delete(:hash) valid_types<<[:l_square] if types.delete(:array) valid_types<<types valid_types.flatten! !(valid_types & [self[pos].kind]).empty? end
Performs a set intersection operator to see if we have a close token as pos
# File libs/lexer.rb, line 649 def open?(pos, open_type=nil) return of_type?(pos,:open) if open_type.nil? of_type?(pos,open_type) end
# File libs/lexer.rb, line 539 def parse(args={}) pos=args[:pos] || 0 args.delete(:pos) pos,tmp=unravel(pos,args) if tmp.length==1 && tmp[0].class==Array tmp[0] else tmp end end
Do we have a simple array? “1 2,3,4” -> 2,3,4
# File libs/lexer.rb, line 692 def simple_array?(pos,args={}) return false if array?(pos) p1=pos # "bla , bla" -> (p1=bla) (p2=,) (p3=bla) #Find the remaining positions. Return false if walk returns nil (p2 = walk(pos+1)).nil? && (return false) (p3 = walk(p2+1)).nil? && (return false) p1=of_type?(p1,:element) p2=of_type?(p2,:comma) p3=of_type?(p3,[:element, :open]) p1 && p2 && p3 end
Walk Parameters:
pos, :look_for, :walk_over
Will walk over tokens denoted in :walk_over but stop on token symbols denoted in :look_for :walk_over defaults to the whitespace token, returning the position walked to Will start at pos If :look_for is assigned a value walk will walk over :walk_over tokens and stop when the passed token is found. :walk_over and :look_for can be either a single symbol or an array of symbols if :walk_over is nil walk will walk over all tokens until :look_for is found returns the position walked to or nil if :look_for was not found or the end was found If the end was found @pos will never be updated
# File libs/lexer.rb, line 610 def walk(pos,args={}) look_for = args[:look_for] || [] look_for=[look_for] if look_for.class!=Array look_for.compact! walk_over = args[:walk_over] || [:whitespace] walk_over=[walk_over] if walk_over.class!=Array walk_over.compact! start_pos=pos raise ":walk_over and :look_for cannot both be empty" if look_for.empty? && walk_over.empty? return start_pos if end?(pos) if walk_over.empty? while !end?(pos) && !(look_for & [self[pos].kind]).empty? pos+=1 end else while !end?(pos) && !(walk_over & [self[pos].kind]).empty? pos+=1 end end if !look_for.empty? return start_pos if (look_for & [self[pos].kind]).empty? end pos end
# File libs/lexer.rb, line 561 def what_is?(pos,args={}) return :end if end?(pos) return :whitespace if of_type?(pos,:whitespace) return :comma if of_type?(pos,:comma) return :escape if of_type?(pos,:escape) return :comment if of_type?(pos,:comment) return :paren if of_type?(pos,:paren) return :close if close?(pos) return :hash if hash?(pos) return :array if array?(pos) # return :simple_array if simple_array?(pos) return :assignment if assignment?(pos) :other end
Private Instance Methods
# File libs/lexer.rb, line 745 def get_assignment(pos,args={}) positions=assignment?(pos,:return_pos=>true) invalid_character(pos,:msg=>"Invalid assignment") if positions.nil? lside=self[positions[0]].value if of_type?(positions[2],:element) rside=self[positions[2]].value pos=positions[2]+1 elsif of_type?(positions[2],:l_curly) pos,rside=get_hash(positions[2]) else pos,rside=unravel(positions[2]+1,:close=>get_close(positions[2])) end return pos,{lside=>rside} end
# File libs/lexer.rb, line 730 def get_close(pos) case self[pos].kind when :l_curly :r_curly when :l_square :r_square when :l_paren :r_paren when :word, :quote, :number :whitespace else nil end end
# File libs/lexer.rb, line 785 def get_escape(pos,args={}) keep_initial=args[:keep_escape] || false debug(8,:msg=>"(#{self[pos].value.inspect}).length => #{self[pos].value.length}") invalid_character(pos, :error=>EscapeEnd) if self[pos].value.length==1 && end?(pos+1) return pos+1,self[pos].value if keep_initial return pos+1,self[pos].value[1..self[pos].value.length] end
# File libs/lexer.rb, line 761 def get_hash(pos, args={}) invalid_character(pos) if self[pos].kind!=:l_curly pos+=1 retval={} havecomma=true #preload the havecomma statement while !end?(pos,:close=>:r_curly) pos=walk(pos) #walk over excess whitespace if assignment?(pos) && havecomma pos, hashval=get_assignment(pos) retval.merge!(hashval) havecomma=false elsif of_type?(pos,:comma) pos+=1 havecomma=true else invalid_character(pos, :msg=>"Invalid character found while building hash") end pos=walk(pos) #walk over excess whitespace end pos+=1 #we should be over the closing curly brace, increment position invlaid_character if havecomma return pos, retval end
# File libs/lexer.rb, line 895 def unravel(pos,args={}) status=Status.new(pos,self,args) status[:start_pos]=pos if args[:preload] retval = [] retval<<args[:preload] else retval=[] end raise "Close cannot be nil if skip_until_close" if status.skip_until_close && status.close.nil? debug(8,:msg=>"Unravel",:var=>[status,pos]) invalid_tokens=[] invalid_tokens<<:whitespace if !status.close.nil? && !([:r_curly,:r_paren,:r_square] & [status.close]).empty? pos=walk(pos) #skip whitespace invalid_character(pos) if invalid?(pos,[:comma]) || close?(pos) #String cannot start with a comma or bracket close while !end?(pos,:close=>status.close) begin debug(8,:msg=>"Unravel-while",:var=>[pos,self[pos]]) debug(8,:msg=>"Unravel-while",:var=>[status,status.have_item,status.close]) debug(8,:msg=>"Unravel-while",:var=>retval) invalid_character(pos,:error=>UnexpectedClose) if close?(pos) && status.close.nil? if status.skip_until_close debug(8,:msg=>"skip_until_close",:var=>[pos,self[pos]]) retval<<self[pos].value pos+=1 pos=walk(pos) next end case what_is?(pos) when :comment return pos,retval if !status.keep_comment retval<<self[pos].value return pos,retval when :escape status.item(pos) pos,result=get_escape(pos,status.args) retval<<result when :paren status.item(pos) pos,result=unravel(pos+1,:close=>get_close(pos),:skip_until_close=>true) retval<<"(" result.each {|i| retval<<i } retval<<")" when :hash debug(8,:msg=>"hash",:var=>[pos,self[pos]]) status.item(pos) pos,result=get_hash(pos) debug(8,:msg=>"hash-return",:var=>[pos,self[pos]]) retval<<result when :array status.item(pos) pos,result=unravel(pos+1,:close=>get_close(pos)) retval<<result #when :simple_array # #if our delimiter is a comma then we've already detected the simple array # if delim==:comma # retval<<self[pos].value # pos+=1 # have_item=true # else # pos,result=unravel(pos,:close=>:whitespace) # retval<<result # have_item=false # end when :assignment status.item(pos) debug(8,:msg=>"assignment",:var=>[pos,self[pos]]) pos,result=get_assignment(pos) debug(8,:msg=>"assignment-return",:var=>[pos,self[pos]]) retval<<result have_item=true when :comma, :whitespace begin status.delimiter(pos) rescue WhitespaceExpected last=retval.pop pos+=1 pos,result=unravel(pos,:close=>:whitespace, :preload=>last) retval<<result end return pos, retval if status.have_item && status.close==:whitespace pos+=1 when :close invalid_character(pos,:error=>UnexpectedClose) if self[pos].kind!=status.close pos+=1 return pos,retval when :other debug(8,:msg=>"Unravel-:other",:var=>[self[pos]]) status.item(pos) #if status.have_item && status.close==:whitespace # return pos,retval #end retval<<self[pos].value pos+=1 else #case what_is?(pos) invalid_character(pos) end #case what_is?(pos) debug(8,:msg=>"walk",:var=>[pos,self[pos]]) pos=walk(pos) #walk whitespace ready for next round debug(8,:msg=>"walk-after",:var=>[pos,self[pos]]) rescue DelimiterExpected=>e debug(8,:var=>caller.length) debug(8,:var=>status) debug(8,:var=>[pos,self[pos]]) if status.delim==:comma && status.have_item debug(8) return pos,retval else debug(8) raise e end debug(8) end end invalid_character(pos) if status.have_delim && status.delim==:comma pos+=1 debug(8,:msg=>"Unravel-While-end",:var=>[have_item, status.delim]) return pos, retval end