class Mingle::MingleLexer
Constants
- DIGIT
- IDENT_SEPS
- LC_ALPHA
- LC_HEX
- LEAD_SURROGATE
- TRAIL_SURROGATE
- UC_ALPHA
- UC_HEX
Public Instance Methods
create_loc( col_adj = 0 )
click to toggle source
# File lib/mingle.rb, line 639 def create_loc( col_adj = 0 ) ParseLocation.new( :col => @col + col_adj, :line => @line ) end
eof?()
click to toggle source
# File lib/mingle.rb, line 634 def eof? @io.eof? end
expect_token( typ = nil )
click to toggle source
# File lib/mingle.rb, line 1174 def expect_token( typ = nil ) case when typ == nil then read_token || fail_unexpected_end when typ == StringToken || typ == NumericToken || typ == MingleIdentifier || typ == DeclaredTypeName read_token( typ ) else raise "Unhandled token expect type: #{typ}" end end
read_token( typ = nil )
click to toggle source
Note about the case statement: the typ based checks need to fire before char ones so that if, for example, typ is DeclaredTypeName
and the input is ‘a’, we will fail as a bad type name rather than returning the identifier ‘a’
# File lib/mingle.rb, line 1149 def read_token( typ = nil ) # Don't peek -- do get/unget so we get a true loc ch = get_char loc = create_loc unget_char( ch ) case when typ == StringToken then res = read_string when typ == NumericToken then res = read_number when typ == MingleIdentifier then res = read_ident when typ == DeclaredTypeName then res = read_decl_type_name when ident_start?( ch ) then res = read_ident when decl_nm_start?( ch ) then res = read_decl_type_name when special_char?( ch ) then res = read_special when whitespace?( ch ) then res = read_whitespace when ch == ?" then res = read_string when starts_num?( ch ) then res = read_number else fail_parsef( "Unrecognized token: #{err_ch( get_char )}" ) end [ res, loc ] end
Private Instance Methods
append_string_tok( dest, ch )
click to toggle source
# File lib/mingle.rb, line 1047 def append_string_tok( dest, ch ) if Chars.ctl_char?( ch ) unget_char( ch ) # To reset line num in case we read \n msg = "Invalid control character in string literal: #{err_ch( ch )}" impl_fail_parse( msg, create_loc( 1 ) ) else dest << ch end end
can_trail?( styl )
click to toggle source
# File lib/mingle.rb, line 767 def can_trail?( styl ) styl == ID_STYLE_LC_UNDERSCORE || styl == ID_STYLE_LC_HYPHENATED end
decl_nm_char?( ch )
click to toggle source
# File lib/mingle.rb, line 877 def decl_nm_char?( ch ) [ UC_ALPHA, LC_ALPHA, DIGIT ].find { |rng| rng.include?( ch ) } end
decl_nm_start?( ch )
click to toggle source
# File lib/mingle.rb, line 872 def decl_nm_start?( ch ) UC_ALPHA.include?( ch ) end
err_ch( ch, ch_desc = nil )
click to toggle source
# File lib/mingle.rb, line 682 def err_ch( ch, ch_desc = nil ) if ch ch_desc ||= inspect_char( ch ) sprintf( "#{ch_desc} (0x%02X)", ch.ord ) else "END" end end
escape_utf16( bin )
click to toggle source
# File lib/mingle.rb, line 971 def escape_utf16( bin ) res = "" unless bin.size % 2 == 0 raise "Bin string size #{bin.size} not a multiple of 4 bytes" end ( bin.size / 2 ).times do |i| res << sprintf( "\\u%04X", bin[ 2 * i, 2 ].unpack( "n" )[ 0 ] ) end res end
fail_parse( msg )
click to toggle source
# File lib/mingle.rb, line 649 def fail_parse( msg ) impl_fail_parse( msg, create_loc ) end
fail_parsef( *argv )
click to toggle source
# File lib/mingle.rb, line 654 def fail_parsef( *argv ) fail_parse( sprintf( *argv ) ) end
fail_unexpected_end( msg = "Unexpected end of input" )
click to toggle source
# File lib/mingle.rb, line 659 def fail_unexpected_end( msg = "Unexpected end of input" ) @col += 1 if eof? fail_parse( msg ) end
get_char( fail_on_eof = false )
click to toggle source
# File lib/mingle.rb, line 693 def get_char( fail_on_eof = false ) if ch = @io.getc if ch == ?\n @unread_col, @col = @col, 0 @line += 1 else @col += 1 end ch else fail_parse( "Unexpected end of input" ) if fail_on_eof end end
hex_char?( ch )
click to toggle source
# File lib/mingle.rb, line 937 def hex_char?( ch ) [ DIGIT, UC_HEX, LC_HEX ].find { |rng| rng.include?( ch ) } end
ident_part_char?( ch )
click to toggle source
# File lib/mingle.rb, line 748 def ident_part_char?( ch ) [ LC_ALPHA, DIGIT ].find { |rng| rng.include?( ch ) } end
ident_part_sep?( ch )
click to toggle source
# File lib/mingle.rb, line 753 def ident_part_sep?( ch ) [ IDENT_SEPS, UC_ALPHA ].find { |rng| rng.include?( ch ) } end
ident_start?( ch )
click to toggle source
# File lib/mingle.rb, line 743 def ident_start?( ch ) LC_ALPHA.include?( ch ) end
impl_fail_parse( msg, loc )
click to toggle source
# File lib/mingle.rb, line 644 def impl_fail_parse( msg, loc ) raise MingleParseError.new( :err => msg, :loc => loc ) end
impl_initialize()
click to toggle source
# File lib/mingle.rb, line 629 def impl_initialize @line, @col = 1, 0 end
inspect_char( ch )
click to toggle source
For compatibility and ease of asserting error messages, we make sure this converts t –> “t”, n –> “n”, etc, and otherwise converts 0x01 –> “x01” (even though ruby 1.9x would yield “u0001”)
# File lib/mingle.rb, line 669 def inspect_char( ch ) case when ch == ?\n then '"\n"' when ch == ?\t then '"\t"' when ch == ?\f then '"\f"' when ch == ?\r then '"\r"' when ch == ?\b then '"\b"' when Chars.ctl_char?( ch ) then sprintf( '"\x%02X"', ch.ord ) else ch.chr.inspect end end
new_bin_str()
click to toggle source
# File lib/mingle.rb, line 942 def new_bin_str RubyVersions.when_19x( "" ) { |s| s.encode!( "binary" ) } end
peek_char()
click to toggle source
# File lib/mingle.rb, line 727 def peek_char get_char.tap { |ch| unget_char( ch ) } end
poll_chars( *expct )
click to toggle source
# File lib/mingle.rb, line 732 def poll_chars( *expct ) if expct.include?( ch = get_char ) ch else unget_char( ch ) nil end end
read_decl_type_name()
click to toggle source
# File lib/mingle.rb, line 882 def read_decl_type_name fail_unexpected_end( "Empty type name" ) if eof? if decl_nm_start?( ch = get_char ) res = ch.chr else fail_parse( "Illegal type name start: #{err_ch( ch )}" ) end begin if decl_nm_char?( ch = get_char ) res << ch else unget_char( ch ) ch = nil end end while ch DeclaredTypeName.send( :new, :name => res ) end
read_dig_str( err_desc, *ends )
click to toggle source
# File lib/mingle.rb, line 1086 def read_dig_str( err_desc, *ends ) res = "" begin if DIGIT.include?( ch = get_char ) res << ch else if [ nil, ?e, ?E ].include?( ch ) || special_char?( ch ) unget_char( ch ) ch = nil else fail_parse( "Unexpected char in #{err_desc}: #{err_ch( ch )}" ) end end end while ch fail_parse( "Number has empty or invalid #{err_desc}" ) if res.empty? res end
read_escaped_char( dest )
click to toggle source
# File lib/mingle.rb, line 1031 def read_escaped_char( dest ) case ch = get_char when ?n then dest << "\n" when ?t then dest << "\t" when ?f then dest << "\f" when ?r then dest << "\r" when ?b then dest << "\b" when ?\\ then dest << "\\" when ?" then dest << "\"" when ?u then read_utf16_escape( dest ) else fail_parse( "Unrecognized escape: #{err_ch( ch, "\\#{ch.chr}" )}" ) end end
read_ident( styl = nil )
click to toggle source
# File lib/mingle.rb, line 854 def read_ident( styl = nil ) parts = [] begin unless eof? expct = parts.empty? || can_trail?( styl ) part, styl, id_done = read_ident_part( styl, expct ) parts << part unless part.empty? end end until id_done || eof? fail_unexpected_end( "Empty identifier" ) if parts.empty? MingleIdentifier.send( :new, :parts => parts ) end
read_ident_part( styl, expct )
click to toggle source
# File lib/mingle.rb, line 840 def read_ident_part( styl, expct ) part, id_done = "", false if ch = read_ident_part_start( styl, expct ) part << ch styl, id_done = read_ident_part_tail( part, styl ) end [ part, styl, part.empty? || id_done ] end
read_ident_part_start( styl, expct )
click to toggle source
# File lib/mingle.rb, line 772 def read_ident_part_start( styl, expct ) ch, res = get_char, nil if styl == ID_STYLE_LC_CAMEL_CAPPED res = ch.chr.downcase if UC_ALPHA.include?( ch ) else res = ch if ident_start?( ch ) end unless res if expct fail_parse "Illegal start of identifier part: #{err_ch( ch )}" else unget_char( ch ) end end res end
read_ident_part_tail( part, styl )
click to toggle source
# File lib/mingle.rb, line 818 def read_ident_part_tail( part, styl ) part_done = false begin ch = get_char case when ident_part_char?( ch ) then part << ch when ident_part_sep?( ch ) styl, part_done = read_ident_sep( ch, styl ), true else part_done, id_done = true, true unget_char( ch ) end end until part_done [ styl, id_done ] end
read_ident_sep( ch, styl )
click to toggle source
# File lib/mingle.rb, line 794 def read_ident_sep( ch, styl ) if styl if ch == sep_char_for( styl ) if eof? && can_trail?( styl ) fail_unexpected_end( "Empty identifier part" ) end else unget_char( ch ) end else case ch when ?- then styl = ID_STYLE_LC_HYPHENATED when ?_ then styl = ID_STYLE_LC_UNDERSCORE else styl = ID_STYLE_LC_CAMEL_CAPPED unget_char( ch ) end end styl end
read_num_exp( opts )
click to toggle source
# File lib/mingle.rb, line 1110 def read_num_exp( opts ) if [ ?e, ?E ].include?( ch = get_char ) opts[ :exp_char ] = ch.chr opts[ :exp ] = ( poll_chars( ?-, ?+ ) == ?- ? "-" : "" ) + read_dig_str( "exponent" ) else if ch == nil || whitespace?( ch ) || ( ch != ?. && special_char?( ch ) ) unget_char( ch ) else fail_parse( "Expected exponent start or num end, found: " + err_ch( ch ) ) end end end
read_number()
click to toggle source
# File lib/mingle.rb, line 1133 def read_number opts = {} opts[ :int ] = read_dig_str( "integer part" ) opts[ :frac ] = read_dig_str( "fractional part" ) if poll_chars( ?. ) read_num_exp( opts ) NumericToken.new( opts ) end
read_special()
click to toggle source
# File lib/mingle.rb, line 910 def read_special SpecialToken.new( :val => get_char.chr ) end
read_string()
click to toggle source
# File lib/mingle.rb, line 1060 def read_string unless ( ch = get_char ) == ?" fail_parse( "Expected string start, saw #{err_ch( ch )}" ) end res = RubyVersions.when_19x( "" ) { |s| s.encode!( "utf-8" ) } begin case ch = get_char when nil then fail_parse( "Unterminated string literal" ) when ?\\ then read_escaped_char( res ) when ?" then nil else append_string_tok( res, ch ) end end until ch == ?" StringToken.new( :val => res ) end
read_trail_surrogate( bin )
click to toggle source
# File lib/mingle.rb, line 987 def read_trail_surrogate( bin ) tmpl = "Expected trailing surrogate, found: %s" unless ( ch = get_char( true ) ) == ?\\ impl_fail_parse( sprintf( tmpl, err_ch( ch ) ), create_loc ) end unless ( ch = get_char( true ) ) == ?u impl_fail_parse( sprintf( tmpl, "\\#{ch.chr}" ), create_loc( -1 ) ) end hi, lo = read_utf16_bytes bin << hi << lo unless surrogate?( hi, lo, TRAIL_SURROGATE ) msg = "Invalid surrogate pair #{escape_utf16( bin )}" impl_fail_parse( msg, create_loc( -11 ) ) end end
read_utf16_bytes()
click to toggle source
# File lib/mingle.rb, line 947 def read_utf16_bytes Array.new( 2 ) do s = "" 2.times do if hex_char?( ch = get_char ) s << ch else fail_parse( "Invalid hex char in escape: #{err_ch( ch )}" ) end end s.to_i( 16 ) end end
read_utf16_escape( dest )
click to toggle source
# File lib/mingle.rb, line 1009 def read_utf16_escape( dest ) bin = new_bin_str hi, lo = read_utf16_bytes bin << hi << lo if surrogate?( hi, lo, LEAD_SURROGATE ) read_trail_surrogate( bin ) elsif surrogate?( hi, lo, TRAIL_SURROGATE ) msg = "Trailing surrogate with no lead: #{escape_utf16( bin )}" impl_fail_parse( msg, create_loc( -5 ) ) end if USE_ICONV dest << Iconv.conv( "utf-8", "utf-16be", bin ) else dest << bin.encode!( "utf-8", "utf-16be" ) end end
read_whitespace()
click to toggle source
# File lib/mingle.rb, line 920 def read_whitespace ws = "" begin if whitespace?( ch = get_char ) ws << ch else unget_char( ch ) ch = nil end end while ch WhitespaceToken.new( :ws => ws ) end
sep_char_for( styl )
click to toggle source
# File lib/mingle.rb, line 758 def sep_char_for( styl ) case styl when ID_STYLE_LC_HYPHENATED then ?- when ID_STYLE_LC_UNDERSCORE then ?_ else nil end end
special_char?( ch )
click to toggle source
# File lib/mingle.rb, line 905 def special_char?( ch ) ch && SpecialToken::TOK_CHARS.index( ch ) end
starts_num?( ch )
click to toggle source
# File lib/mingle.rb, line 1081 def starts_num?( ch ) DIGIT.include?( ch ) end
surrogate?( hi, lo, rng )
click to toggle source
# File lib/mingle.rb, line 966 def surrogate?( hi, lo, rng ) rng.include?( ( hi << 8 ) + lo ) end
unget_char( ch )
click to toggle source
Okay to call with nil (okay to unget EOF)
# File lib/mingle.rb, line 712 def unget_char( ch ) if ch @io.ungetc( ch ) if ch == ?\n @line, @col = @line - 1, @unread_col else @col -= 1 end end end
whitespace?( ch )
click to toggle source
# File lib/mingle.rb, line 915 def whitespace?( ch ) ch && " \n\r\t".index( ch ) end