class Mingle::MingleLexer

Constants

DIGIT
IDENT_SEPS
LC_ALPHA
LC_HEX
LEAD_SURROGATE
TRAIL_SURROGATE
UC_ALPHA
UC_HEX

Public Instance Methods

create_loc( col_adj = 0 ) click to toggle source
# File lib/mingle.rb, line 639
def create_loc( col_adj = 0 )
    ParseLocation.new( :col => @col + col_adj, :line => @line )
end
eof?() click to toggle source
# File lib/mingle.rb, line 634
def eof?
    @io.eof?
end
expect_token( typ = nil ) click to toggle source
# File lib/mingle.rb, line 1174
def expect_token( typ = nil )
    case
    when typ == nil then read_token || fail_unexpected_end
    when typ == StringToken || typ == NumericToken ||
         typ == MingleIdentifier || typ == DeclaredTypeName 
        read_token( typ )
    else raise "Unhandled token expect type: #{typ}"
    end
end
read_token( typ = nil ) click to toggle source

Note about the case statement: the typ based checks need to fire before char ones so that if, for example, typ is DeclaredTypeName and the input is ‘a’, we will fail as a bad type name rather than returning the identifier ‘a’

# File lib/mingle.rb, line 1149
def read_token( typ = nil )

    # Don't peek -- do get/unget so we get a true loc
    ch = get_char
    loc = create_loc
    unget_char( ch )

    case 
    when typ == StringToken then res = read_string
    when typ == NumericToken then res = read_number
    when typ == MingleIdentifier then res = read_ident
    when typ == DeclaredTypeName then res = read_decl_type_name
    when ident_start?( ch ) then res = read_ident
    when decl_nm_start?( ch ) then res = read_decl_type_name
    when special_char?( ch ) then res = read_special
    when whitespace?( ch ) then res = read_whitespace
    when ch == ?" then res = read_string
    when starts_num?( ch ) then res = read_number
    else fail_parsef( "Unrecognized token: #{err_ch( get_char )}" )
    end

    [ res, loc ]
end

Private Instance Methods

append_string_tok( dest, ch ) click to toggle source
# File lib/mingle.rb, line 1047
def append_string_tok( dest, ch )
 
    if Chars.ctl_char?( ch )

        unget_char( ch ) # To reset line num in case we read \n
        msg = "Invalid control character in string literal: #{err_ch( ch )}"
        impl_fail_parse( msg, create_loc( 1 ) )
    else
        dest << ch
    end
end
can_trail?( styl ) click to toggle source
# File lib/mingle.rb, line 767
def can_trail?( styl )
    styl == ID_STYLE_LC_UNDERSCORE || styl == ID_STYLE_LC_HYPHENATED
end
decl_nm_char?( ch ) click to toggle source
# File lib/mingle.rb, line 877
def decl_nm_char?( ch )
    [ UC_ALPHA, LC_ALPHA, DIGIT ].find { |rng| rng.include?( ch ) }
end
decl_nm_start?( ch ) click to toggle source
# File lib/mingle.rb, line 872
def decl_nm_start?( ch )
    UC_ALPHA.include?( ch )
end
err_ch( ch, ch_desc = nil ) click to toggle source
# File lib/mingle.rb, line 682
def err_ch( ch, ch_desc = nil )

    if ch
        ch_desc ||= inspect_char( ch )
        sprintf( "#{ch_desc} (0x%02X)", ch.ord ) 
    else
        "END"
    end
end
escape_utf16( bin ) click to toggle source
# File lib/mingle.rb, line 971
def escape_utf16( bin )
    
    res = ""
    
    unless bin.size % 2 == 0
        raise "Bin string size #{bin.size} not a multiple of 4 bytes"
    end

    ( bin.size / 2 ).times do |i|
        res << sprintf( "\\u%04X", bin[ 2 * i, 2 ].unpack( "n" )[ 0 ] )
    end

    res
end
fail_parse( msg ) click to toggle source
# File lib/mingle.rb, line 649
def fail_parse( msg )
    impl_fail_parse( msg, create_loc )
end
fail_parsef( *argv ) click to toggle source
# File lib/mingle.rb, line 654
def fail_parsef( *argv )
    fail_parse( sprintf( *argv ) )
end
fail_unexpected_end( msg = "Unexpected end of input" ) click to toggle source
# File lib/mingle.rb, line 659
def fail_unexpected_end( msg = "Unexpected end of input" )
    
    @col += 1 if eof?
    fail_parse( msg )
end
get_char( fail_on_eof = false ) click to toggle source
# File lib/mingle.rb, line 693
def get_char( fail_on_eof = false )
    
    if ch = @io.getc

        if ch == ?\n
            @unread_col, @col = @col, 0
            @line += 1
        else
            @col += 1
        end

        ch
    else
        fail_parse( "Unexpected end of input" ) if fail_on_eof
    end
end
hex_char?( ch ) click to toggle source
# File lib/mingle.rb, line 937
def hex_char?( ch )
    [ DIGIT, UC_HEX, LC_HEX ].find { |rng| rng.include?( ch ) }
end
ident_part_char?( ch ) click to toggle source
# File lib/mingle.rb, line 748
def ident_part_char?( ch )
    [ LC_ALPHA, DIGIT ].find { |rng| rng.include?( ch ) }
end
ident_part_sep?( ch ) click to toggle source
# File lib/mingle.rb, line 753
def ident_part_sep?( ch )
    [ IDENT_SEPS, UC_ALPHA ].find { |rng| rng.include?( ch ) }
end
ident_start?( ch ) click to toggle source
# File lib/mingle.rb, line 743
def ident_start?( ch )
    LC_ALPHA.include?( ch )
end
impl_fail_parse( msg, loc ) click to toggle source
# File lib/mingle.rb, line 644
def impl_fail_parse( msg, loc )
    raise MingleParseError.new( :err => msg, :loc => loc )
end
impl_initialize() click to toggle source
# File lib/mingle.rb, line 629
def impl_initialize
    @line, @col = 1, 0
end
inspect_char( ch ) click to toggle source

For compatibility and ease of asserting error messages, we make sure this converts t –> “t”, n –> “n”, etc, and otherwise converts 0x01 –> “x01” (even though ruby 1.9x would yield “u0001”)

# File lib/mingle.rb, line 669
def inspect_char( ch )
    case
    when ch == ?\n then '"\n"'
    when ch == ?\t then '"\t"'
    when ch == ?\f then '"\f"'
    when ch == ?\r then '"\r"'
    when ch == ?\b then '"\b"'
    when Chars.ctl_char?( ch ) then sprintf( '"\x%02X"', ch.ord )
    else ch.chr.inspect
    end
end
new_bin_str() click to toggle source
# File lib/mingle.rb, line 942
def new_bin_str
    RubyVersions.when_19x( "" ) { |s| s.encode!( "binary" ) }
end
peek_char() click to toggle source
# File lib/mingle.rb, line 727
def peek_char
    get_char.tap { |ch| unget_char( ch ) }
end
poll_chars( *expct ) click to toggle source
# File lib/mingle.rb, line 732
def poll_chars( *expct )

    if expct.include?( ch = get_char )
        ch
    else
        unget_char( ch )
        nil
    end
end
read_decl_type_name() click to toggle source
# File lib/mingle.rb, line 882
def read_decl_type_name
    
    fail_unexpected_end( "Empty type name" ) if eof?

    if decl_nm_start?( ch = get_char )
        res = ch.chr
    else
        fail_parse( "Illegal type name start: #{err_ch( ch )}" )
    end

    begin
        if decl_nm_char?( ch = get_char )
            res << ch
        else
            unget_char( ch )
            ch = nil
        end
    end while ch

    DeclaredTypeName.send( :new, :name => res )
end
read_dig_str( err_desc, *ends ) click to toggle source
# File lib/mingle.rb, line 1086
def read_dig_str( err_desc, *ends )
    
    res = ""

    begin
        if DIGIT.include?( ch = get_char )
            res << ch
        else
            if [ nil, ?e, ?E ].include?( ch ) || special_char?( ch )
                unget_char( ch )
                ch = nil
            else
                fail_parse( 
                    "Unexpected char in #{err_desc}: #{err_ch( ch )}" )
            end
        end
    end while ch

    fail_parse( "Number has empty or invalid #{err_desc}" ) if res.empty? 

    res
end
read_escaped_char( dest ) click to toggle source
# File lib/mingle.rb, line 1031
def read_escaped_char( dest )
    
    case ch = get_char
    when ?n then dest << "\n"
    when ?t then dest << "\t"
    when ?f then dest << "\f"
    when ?r then dest << "\r"
    when ?b then dest << "\b"
    when ?\\ then dest << "\\"
    when ?" then dest << "\""
    when ?u then read_utf16_escape( dest )
    else fail_parse( "Unrecognized escape: #{err_ch( ch, "\\#{ch.chr}" )}" )
    end
end
read_ident( styl = nil ) click to toggle source
# File lib/mingle.rb, line 854
def read_ident( styl = nil )
    
    parts = []
    
    begin
        unless eof?
            expct = parts.empty? || can_trail?( styl )
            part, styl, id_done = read_ident_part( styl, expct )
            parts << part unless part.empty?
        end

    end until id_done || eof?

    fail_unexpected_end( "Empty identifier" ) if parts.empty?
    MingleIdentifier.send( :new, :parts => parts )
end
read_ident_part( styl, expct ) click to toggle source
# File lib/mingle.rb, line 840
def read_ident_part( styl, expct )
    
    part, id_done = "", false

    if ch = read_ident_part_start( styl, expct )

        part << ch
        styl, id_done = read_ident_part_tail( part, styl )
    end

    [ part, styl, part.empty? || id_done ]
end
read_ident_part_start( styl, expct ) click to toggle source
# File lib/mingle.rb, line 772
def read_ident_part_start( styl, expct )
    
    ch, res = get_char, nil

    if styl == ID_STYLE_LC_CAMEL_CAPPED
        res = ch.chr.downcase if UC_ALPHA.include?( ch )
    else
        res = ch if ident_start?( ch )
    end

    unless res
        if expct
            fail_parse "Illegal start of identifier part: #{err_ch( ch )}"
        else
            unget_char( ch ) 
        end
    end

    res
end
read_ident_part_tail( part, styl ) click to toggle source
# File lib/mingle.rb, line 818
def read_ident_part_tail( part, styl )
 
    part_done = false

    begin

        ch = get_char
        case
        when ident_part_char?( ch ) then part << ch
        when ident_part_sep?( ch ) 
            styl, part_done = read_ident_sep( ch, styl ), true
        else 
            part_done, id_done = true, true
            unget_char( ch )
        end

    end until part_done

    [ styl, id_done ]
end
read_ident_sep( ch, styl ) click to toggle source
# File lib/mingle.rb, line 794
def read_ident_sep( ch, styl )
 
    if styl
        if ch == sep_char_for( styl )
            if eof? && can_trail?( styl )
                fail_unexpected_end( "Empty identifier part" ) 
            end
        else
            unget_char( ch ) 
        end
    else
        case ch
        when ?- then styl = ID_STYLE_LC_HYPHENATED
        when ?_ then styl = ID_STYLE_LC_UNDERSCORE
        else 
            styl = ID_STYLE_LC_CAMEL_CAPPED
            unget_char( ch )
        end
    end

    styl
end
read_num_exp( opts ) click to toggle source
# File lib/mingle.rb, line 1110
def read_num_exp( opts )
    
    if [ ?e, ?E ].include?( ch = get_char )

        opts[ :exp_char ] = ch.chr

        opts[ :exp ] = 
            ( poll_chars( ?-, ?+ ) == ?- ? "-" : "" ) + 
            read_dig_str( "exponent" )
    else
        if ch == nil || whitespace?( ch ) || 
               ( ch != ?. && special_char?( ch ) )
            unget_char( ch )
        else
            fail_parse( 
                "Expected exponent start or num end, found: " +
                err_ch( ch )
            )
        end
    end
end
read_number() click to toggle source
# File lib/mingle.rb, line 1133
def read_number
    
    opts = {}

    opts[ :int ] = read_dig_str( "integer part" )
    opts[ :frac ] = read_dig_str( "fractional part" ) if poll_chars( ?. )
    read_num_exp( opts )

    NumericToken.new( opts )
end
read_special() click to toggle source
# File lib/mingle.rb, line 910
def read_special
    SpecialToken.new( :val => get_char.chr )
end
read_string() click to toggle source
# File lib/mingle.rb, line 1060
def read_string
    
    unless ( ch = get_char ) == ?"
        fail_parse( "Expected string start, saw #{err_ch( ch )}" )
    end

    res = RubyVersions.when_19x( "" ) { |s| s.encode!( "utf-8" ) }

    begin
        case ch = get_char
        when nil then fail_parse( "Unterminated string literal" )
        when ?\\ then read_escaped_char( res )
        when ?" then nil
        else append_string_tok( res, ch )
        end
    end until ch == ?"

    StringToken.new( :val => res )
end
read_trail_surrogate( bin ) click to toggle source
# File lib/mingle.rb, line 987
def read_trail_surrogate( bin )

    tmpl = "Expected trailing surrogate, found: %s"

    unless ( ch = get_char( true ) ) == ?\\
        impl_fail_parse( sprintf( tmpl, err_ch( ch ) ), create_loc )
    end

    unless ( ch = get_char( true ) ) == ?u
        impl_fail_parse( sprintf( tmpl, "\\#{ch.chr}" ), create_loc( -1 ) )
    end

    hi, lo = read_utf16_bytes
    bin << hi << lo

    unless surrogate?( hi, lo, TRAIL_SURROGATE )
        msg = "Invalid surrogate pair #{escape_utf16( bin )}"
        impl_fail_parse( msg, create_loc( -11 ) )
    end
end
read_utf16_bytes() click to toggle source
# File lib/mingle.rb, line 947
def read_utf16_bytes

    Array.new( 2 ) do
        
        s = ""

        2.times do
            if hex_char?( ch = get_char )
                s << ch
            else
                fail_parse( "Invalid hex char in escape: #{err_ch( ch )}" )
            end
        end

        s.to_i( 16 )
    end
end
read_utf16_escape( dest ) click to toggle source
# File lib/mingle.rb, line 1009
def read_utf16_escape( dest )
    
    bin = new_bin_str

    hi, lo = read_utf16_bytes
    bin << hi << lo

    if surrogate?( hi, lo, LEAD_SURROGATE )
        read_trail_surrogate( bin ) 
    elsif surrogate?( hi, lo, TRAIL_SURROGATE )
        msg = "Trailing surrogate with no lead: #{escape_utf16( bin )}"
        impl_fail_parse( msg, create_loc( -5 ) )
    end

    if USE_ICONV
        dest << Iconv.conv( "utf-8", "utf-16be", bin )
    else
        dest << bin.encode!( "utf-8", "utf-16be" )
    end
end
read_whitespace() click to toggle source
# File lib/mingle.rb, line 920
def read_whitespace
    
    ws = ""

    begin
        if whitespace?( ch = get_char )
            ws << ch
        else
            unget_char( ch )
            ch = nil
        end
    end while ch

    WhitespaceToken.new( :ws => ws )
end
sep_char_for( styl ) click to toggle source
# File lib/mingle.rb, line 758
def sep_char_for( styl )
    case styl
    when ID_STYLE_LC_HYPHENATED then ?-
    when ID_STYLE_LC_UNDERSCORE then ?_
    else nil
    end
end
special_char?( ch ) click to toggle source
# File lib/mingle.rb, line 905
def special_char?( ch )
    ch && SpecialToken::TOK_CHARS.index( ch )
end
starts_num?( ch ) click to toggle source
# File lib/mingle.rb, line 1081
def starts_num?( ch )
    DIGIT.include?( ch )
end
surrogate?( hi, lo, rng ) click to toggle source
# File lib/mingle.rb, line 966
def surrogate?( hi, lo, rng )
    rng.include?( ( hi << 8 ) + lo )
end
unget_char( ch ) click to toggle source

Okay to call with nil (okay to unget EOF)

# File lib/mingle.rb, line 712
def unget_char( ch )
    
    if ch

        @io.ungetc( ch )

        if ch == ?\n
            @line, @col = @line - 1, @unread_col
        else
            @col -= 1
        end
    end
end
whitespace?( ch ) click to toggle source
# File lib/mingle.rb, line 915
def whitespace?( ch )
    ch && " \n\r\t".index( ch )
end