class Metasm::C::Parser

Dumper : objects => C source

Attributes

allow_bad_c[RW]
endianness[RW]
lexer[RW]
pragma_pack[RW]
program[RW]
toplevel[RW]
typesize[RW]

Public Class Methods

new(*args) click to toggle source

allowed arguments: ExeFormat, CPU, Preprocessor, Symbol (for the data model)

# File metasm/parse_c.rb, line 1185
def initialize(*args)
        model = args.grep(Symbol).first || :ilp32
        lexer = args.grep(Preprocessor).first || Preprocessor.new
        @program = args.grep(ExeFormat).first
        cpu = args.grep(CPU).first
        cpu ||= @program.cpu if @program
        @lexer = lexer
        @prev_pragma_callback = @lexer.pragma_callback
        @lexer.pragma_callback = lambda { |tok| parse_pragma_callback(tok) }
        @toplevel = Block.new(nil)
        @unreadtoks = []
        @endianness = cpu ? cpu.endianness : :big
        @typesize = { :void => 1, :__int8 => 1, :__int16 => 2, :__int32 => 4, :__int64 => 8,
                :char => 1, :float => 4, :double => 8, :longdouble => 12 }
        send model
        cpu.tune_cparser(self) if cpu
        @program.tune_cparser(self) if @program
end
parse(text) click to toggle source

creates a new CParser, parses all top-level statements

# File metasm/parse_c.rb, line 1162
def self.parse(text)
        new.parse text
end

Public Instance Methods

alloc_c_ary(typename, init=1) click to toggle source

allocate an array of types init is either the length of the array, or an array of initial values

# File metasm/parse_c.rb, line 3188
def alloc_c_ary(typename, init=1)
        type = find_c_type(typename)
        len = init.kind_of?(Integer) ? init : init.length
        struct = C::Array.new(type, len)
        st = AllocCStruct.new(self, struct)
        if init.kind_of?(::Array)
                init.each_with_index { |v, i|
                        st[i] = v
                }
        end
        st
end
alloc_c_struct(structname, values=nil) click to toggle source

allocate a new AllocCStruct from the struct/struct typedef name of the current toplevel optionally populate the fields using the 'values' hash

# File metasm/parse_c.rb, line 3171
def alloc_c_struct(structname, values=nil)
        struct = find_c_struct(structname)
        st = AllocCStruct.new(self, struct)
        values.each { |k, v| st[k] = v } if values
        st
end
check_compatible_type(tok, oldtype, newtype, strict = false, checked = []) click to toggle source

checks that the types are compatible (variable predeclaration, function argument..) strict = false for func call/assignment (eg char compatible with int – but int is incompatible with char) output warnings only

# File metasm/parse_c.rb, line 1379
def check_compatible_type(tok, oldtype, newtype, strict = false, checked = [])
        return if not $VERBOSE
        oldtype = oldtype.untypedef
        newtype = newtype.untypedef
        oldtype = BaseType.new(:int) if oldtype.kind_of? Enum
        newtype = BaseType.new(:int) if newtype.kind_of? Enum

        puts tok.exception('type qualifier mismatch').message if oldtype.qualifier.to_a.uniq.length > newtype.qualifier.to_a.uniq.length

        # avoid infinite recursion
        return if checked.include? oldtype
        checked = checked + [oldtype]

    begin
        case newtype
        when Function
                raise tok, 'type error' if not oldtype.kind_of? Function
                check_compatible_type tok, oldtype.type, newtype.type, strict, checked
                if oldtype.args and newtype.args
                        if oldtype.args.length != newtype.args.length or
                                        oldtype.varargs != newtype.varargs
                                raise tok, 'type error'
                        end
                        oldtype.args.zip(newtype.args) { |oa, na|
                                # begin ; rescue ParseError: raise $!.message + "in parameter #{oa.name}" end
                                check_compatible_type tok, oa.type, na.type, strict, checked
                        }
                end
        when Pointer
                if oldtype.kind_of? BaseType and oldtype.integral?
                        puts tok.exception('making pointer from integer without a cast').message
                        return
                end
                raise tok, 'type error' if not oldtype.kind_of? Pointer
                hasvoid = true if (t = newtype.type.untypedef).kind_of? BaseType and t.name == :void
                hasvoid = true if (t = oldtype.type.untypedef).kind_of? BaseType and t.name == :void        # struct foo *f = NULL;
                if strict and not hasvoid
                        check_compatible_type tok, oldtype.type, newtype.type, strict, checked
                end
        when Union
                raise tok, 'type error' if not oldtype.class == newtype.class
                if oldtype.members and newtype.members
                        if oldtype.members.length != newtype.members.length
                                raise tok, 'bad member count'
                        end
                        oldtype.members.zip(newtype.members) { |om, nm|
                                # raise tok if om.name and nm.name and om.name != nm.name # don't care
                                check_compatible_type tok, om.type, nm.type, strict, checked
                        }
                end
        when BaseType
                raise tok, 'type error' if not oldtype.kind_of? BaseType
                if strict
                        if oldtype.name != newtype.name or
                        oldtype.specifier != newtype.specifier
                                raise tok, 'type error'
                        end
                else
                        raise tok, 'type error' if @typesize[newtype.name] == 0 and @typesize[oldtype.name] > 0
                        puts tok.exception('type size mismatch, may lose bits').message if @typesize[oldtype.name] > @typesize[newtype.name]
                        puts tok.exception('sign mismatch').message if oldtype.specifier != newtype.specifier and @typesize[newtype.name] == @typesize[oldtype.name]
                end
        end
    rescue ParseError
        raise $! if checked.length != 1      # bubble up
        oname = (oldtype.to_s rescue oldtype.class.name)
        nname = (newtype.to_s rescue newtype.class.name)
        puts $!.message + " incompatible type #{oname} to #{nname}"
    end
end
checkstatementend(tok=nil) click to toggle source

checks that we are at the end of a statement, ie an ';' character (consumed), or a '}' (not consumed) otherwise, raise either the given token or self.

# File metasm/parse_c.rb, line 1535
def checkstatementend(tok=nil)
        raise tok || self, '";" expected' if not tok = skipspaces or tok.type != :punct or (tok.raw != ';' and tok.raw != '}')
        unreadtok tok if tok.raw == '}'
end
decode_c_ary(typename, len, str, offset=0) click to toggle source

“cast” a string to C::Array

# File metasm/parse_c.rb, line 3202
def decode_c_ary(typename, len, str, offset=0)
        type = find_c_type(typename)
        struct = C::Array.new(type, len)
        AllocCStruct.new(self, struct, str, offset)
end
decode_c_struct(structname, str, offset=0) click to toggle source

parse a given String as an AllocCStruct offset is an optionnal offset from the string start modification to the structure will modify the underlying string

# File metasm/parse_c.rb, line 3181
def decode_c_struct(structname, str, offset=0)
        struct = find_c_struct(structname)
        AllocCStruct.new(self, struct, str, offset)
end
decode_c_value(str, type, off=0) click to toggle source
# File metasm/parse_c.rb, line 3237
def decode_c_value(str, type, off=0)
        type = type.type if type.kind_of? Variable
        type = type.untypedef
        if type.kind_of? C::Union or type.kind_of? C::Array
                return AllocCStruct.new(self, type, str, off)
        end
        val = Expression.decode_immediate(str, sizeof(type), @endianness, off)
        val = Expression.make_signed(val, sizeof(type)*8) if type.integral? and type.signed?
        val = nil if val == 0 and type.pointer?
        val
end
dump_definition(*funcnames) click to toggle source

returns a string containing the C definition(s) of toplevel functions, with their dependencies

# File metasm/parse_c.rb, line 3311
def dump_definition(*funcnames)
        oldst = @toplevel.statements
        @toplevel.statements = []
        dump_definitions(funcnames.map { |f| @toplevel.symbol[f] })
ensure
        @toplevel.statements = oldst
end
dump_definitions(list, exclude=[]) click to toggle source

returns a big string representing the definitions of all terms appearing in list, excluding exclude includes dependencies

# File metasm/parse_c.rb, line 3288
def dump_definitions(list, exclude=[])
        # recurse all dependencies
        todo_rndr = {}
        todo_deps = {}
        list.each { |t|
                todo_rndr[t], todo_deps[t] = t.dump_def(@toplevel)
        }
        # c.toplevel.anonymous_enums.to_a.each { |t| todo_rndr[t], todo_deps[t] = t.dump_def(c.toplevel) }
        while !(ar = (todo_deps.values.flatten - todo_deps.keys)).empty?
                ar.each { |t|
                        todo_rndr[t], todo_deps[t] = t.dump_def(@toplevel)
                }
        end
        exclude.each { |t| todo_deps.delete t ; todo_rndr.delete t }
        todo_deps.each_key { |t| todo_deps[t] -= exclude }

        all = @toplevel.struct.values + @toplevel.symbol.values
        all -= all.grep(::Integer)   # Enum values

        @toplevel.dump_reorder(all, todo_rndr, todo_deps)[0].join("\n")
end
encode_c_value(type, val) click to toggle source

convert (pack) a ruby value into a C buffer packs integers, converts Strings to their C pointer (using DynLdr)

# File metasm/parse_c.rb, line 3210
def encode_c_value(type, val)
        type = type.type if type.kind_of? Variable

        case val
        when nil; val = 0
        when ::Integer
        when ::String
                val = DynLdr.str_ptr(val)
        when ::Hash
                type = type.pointed while type.pointer?
                raise "need a struct ptr for #{type} #{val.inspect}" if not type.kind_of? Union
                buf = alloc_c_struct(type, val)
                val.instance_variable_set('@rb2c', buf) # GC trick
                val = buf
        when ::Proc
                val = DynLdr.convert_rb2c(type, val)        # allocate a DynLdr callback
        when AllocCStruct
                val = DynLdr.str_ptr(val.str) + val.stroff
        #when ::Float                # TODO
        else raise "TODO #{val.inspect}"
        end

        val = Expression.encode_immediate(val, sizeof(type), @endianness) if val.kind_of?(::Integer)

        val
end
eos?() click to toggle source
# File metasm/parse_c.rb, line 1519
def eos?
        @unreadtoks.empty? and @lexer.eos?
end
exception(msg='EOF unexpected') click to toggle source

allows 'raise self'

# File metasm/parse_c.rb, line 1451
def exception(msg='EOF unexpected')
        @lexer.exception msg
end
factorize(*a) click to toggle source

returns a big string containing all definitions from headers used in the source (including macros)

# File metasm/parse_c.rb, line 3257
def factorize(*a)
        factorize_init
        parse(*a)
        raise @lexer.readtok || self, 'eof expected' if not @lexer.eos?
        factorize_final
end
factorize_final() click to toggle source
# File metasm/parse_c.rb, line 3268
def factorize_final
        # now find all types/defs not coming from the standard headers
        # all
        all = @toplevel.struct.values + @toplevel.symbol.values
        all -= all.grep(::Integer)   # Enum values

        # list of definitions of user-defined objects
        userdefined = all.find_all { |t|
                t.backtrace.backtrace.grep(::String).grep(/^</).empty?
        }

        @toplevel.statements.clear   # don't want all Declarations

        # a macro is fine too
        @lexer.dump_macros(@lexer.traced_macros, false) + "\n\n" +
        dump_definitions(userdefined, userdefined)
end
factorize_init() click to toggle source
# File metasm/parse_c.rb, line 3264
def factorize_init
        @lexer.traced_macros = []
end
find_c_struct(structname) click to toggle source

find a Struct/Union object from a struct name/typedef name raises if it cant find it

# File metasm/parse_c.rb, line 3133
def find_c_struct(structname)
        structname = structname.to_s if structname.kind_of?(::Symbol)
        if structname.kind_of?(::String) and not struct = @toplevel.struct[structname]
                struct = @toplevel.symbol[structname]
                raise "unknown struct #{structname.inspect}" if not struct
                struct = struct.type.untypedef
                struct = struct.pointed while struct.pointer?
                raise "unknown struct #{structname.inspect}" if not struct.kind_of? C::Union
        end
        struct = structname if structname.kind_of? C::Union
        raise "unknown struct #{structname.inspect}" if not struct.kind_of? C::Union
        struct
end
find_c_type(typename) click to toggle source

find a C::Type (struct/union/typedef/basetype) from a string

# File metasm/parse_c.rb, line 3148
def find_c_type(typename)
        typename = typename.to_s if typename.kind_of? ::Symbol
        if typename.kind_of?(::String) and not type = @toplevel.struct[typename]
                if type = @toplevel.symbol[typename]
                        type = type.type.untypedef
                else
                        begin
                                lexer.feed(typename)
                                b = C::Block.new(@toplevel)
                                var = Variable.parse_type(self, b)
                                var.parse_declarator(self, b)
                                type = var.type
                        rescue
                        end
                end
        end
        type = typename if typename.kind_of?(C::Type)
        raise "unknown type #{typename.inspect}" if not type.kind_of? C::Type
        type
end
ilp16() click to toggle source
# File metasm/parse_c.rb, line 1204
def ilp16
        @typesize.update :short => 2, :ptr => 2,
                :int => 2, :long => 4, :longlong => 4
end
ilp32() click to toggle source
# File metasm/parse_c.rb, line 1213
def ilp32
        @typesize.update :short => 2, :ptr => 4,
                :int => 4, :long => 4, :longlong => 8
end
ilp64() click to toggle source
# File metasm/parse_c.rb, line 1226
def ilp64
        @typesize.update :short => 2, :ptr => 8,
                :int => 8, :long => 8, :longlong => 8
end
llp64() click to toggle source
# File metasm/parse_c.rb, line 1218
def llp64
        @typesize.update :short => 2, :ptr => 8,
                :int => 4, :long => 4, :longlong => 8
end
lp32() click to toggle source
# File metasm/parse_c.rb, line 1209
def lp32
        @typesize.update :short => 2, :ptr => 4,
                :int => 2, :long => 4, :longlong => 8
end
lp64() click to toggle source
# File metasm/parse_c.rb, line 1222
def lp64
        @typesize.update :short => 2, :ptr => 8,
                :int => 4, :long => 8, :longlong => 8
end
macro_numeric(m) click to toggle source

check if a macro definition has a numeric value returns this value or nil

# File metasm/parse_c.rb, line 1826
def macro_numeric(m)
        d = @lexer.definition[m]
        return if not d.kind_of? Preprocessor::Macro or d.args or d.varargs
        # filter metasm-defined vars (eg __PE__ / _M_IX86)
        return if not d.name or not bt = d.name.backtrace or (bt[0][0] != " and bt[0][0] != <)
        raise 'cannot macro_numeric with unparsed data' if not eos?
        @lexer.feed m
        if e = CExpression.parse(self, Block.new(@toplevel)) and eos?
                v = e.reduce(self)
                return v if v.kind_of? ::Numeric
        end
        readtok until eos?
        nil
rescue ParseError
        readtok until eos?
        nil
end
numeric_constants() click to toggle source

returns all numeric constants defined with their value, either macros or enums for enums, also return the enum name

# File metasm/parse_c.rb, line 1846
def numeric_constants
        ret = []
        # macros
        @lexer.definition.each_key { |k|
                if v = macro_numeric(k)
                        ret << [k, v]
                end
        }
        # enums
        seen_enum = {}
        @toplevel.struct.each { |k, v|
                if v.kind_of?(Enum)
                        v.members.each { |kk, vv|
                                ret << [kk, vv, k]
                                seen_enum[kk] = true
                        }
                end
        }
        @toplevel.symbol.each { |k, v|
                ret << [k, v] if v.kind_of?(::Numeric) and not seen_enum[k]
        }
        ret
end
parse(text=nil, filename='', lineno=1) click to toggle source

parses the current lexer content (or the text arg) for toplevel definitions

# File metasm/parse_c.rb, line 1167
def parse(text=nil, filename='<unk>', lineno=1)
        @lexer.feed text, filename, lineno if text
        nil while not @lexer.eos? and (parse_definition(@toplevel) or parse_toplevel_statement(@toplevel))
        raise @lexer.readtok || self, 'invalid definition' if not @lexer.eos?
        sanity_checks
        self
end
parse_definition(scope) click to toggle source

parses variable/function definition/declaration/initialization populates scope.symbols and scope.struct raises on redefinitions returns false if no definition found

# File metasm/parse_c.rb, line 1601
def parse_definition(scope)
        return false if not basetype = Variable.parse_type(self, scope, true)

        # check struct predeclaration
        tok = skipspaces
        if tok and tok.type == :punct and tok.raw == ';' and basetype.type and
                        (basetype.type.kind_of? Union or basetype.type.kind_of? Enum)
                return true
        else unreadtok tok
        end

        nofunc = false
        loop do
                var = basetype.dup
                var.parse_declarator(self, scope)

                raise var.backtrace if not var.name # barrel roll

                if prev = scope.symbol[var.name]
                        if prev.kind_of? TypeDef and var.storage == :typedef
                                check_compatible_type(var.backtrace, prev.type, var.type, true)
                                # windows.h redefines many typedefs with the same definition
                                puts "redefining typedef #{var.name}" if $VERBOSE
                                var = prev
                        elsif not prev.kind_of?(Variable) or
                                        prev.initializer or
                                        (prev.storage != :extern and prev.storage != var.storage) or
                                        (scope != @toplevel and prev.storage != :static)
                                if prev.kind_of? ::Integer        # enum value
                                        prev = (scope.struct.values.grep(Enum) + scope.anonymous_enums.to_a).find { |e| e.members.index(prev) }
                                end
                                raise var.backtrace, "redefinition, previous is #{prev.backtrace.exception(nil).message rescue :unknown}"
                        else
                                check_compatible_type var.backtrace, prev.type, var.type, true
                                (var.attributes ||= []).concat prev.attributes if prev.attributes
                        end
                elsif var.storage == :typedef
                        attrs = var.attributes
                        var = TypeDef.new var.name, var.type, var.backtrace
                        var.attributes = attrs if attrs
                end
                scope.statements << Declaration.new(var) unless var.kind_of? TypeDef

                raise tok || self, 'punctuation expected' if not tok = skipspaces or (tok.type != :punct and not %w[asm __asm __asm__].include? tok.raw)

                case tok.raw
                when '{'
                        # function body
                        raise tok if nofunc or not var.kind_of? Variable or not var.type.kind_of? Function
                        scope.symbol[var.name] = var
                        body = var.initializer = Block.new(scope)
                        var.type.args ||= []
                        var.type.args.each { |v|
                                # put func parameters in func body scope
                                # arg redefinition is checked in parse_declarator
                                if not v.name
                                        puts "unnamed argument in definition of #{var.name}" if $DEBUG
                                        next     # should raise to be compliant
                                end
                                body.symbol[v.name] = v   # XXX will need special check in stack allocator
                        }

                        loop do
                                raise tok || self, var.backtrace.exception('"}" expected for end of function') if not tok = skipspaces
                                break if tok.type == :punct and tok.raw == '}'
                                unreadtok tok
                                if not parse_definition(body)
                                        body.statements << parse_statement(body, [var.type.type])
                                end
                        end
                        if $VERBOSE and not body.statements.last.kind_of? Return and not body.statements.last.kind_of? Asm
                                puts tok.exception('missing function return value').message if not var.type.type.untypedef.kind_of? BaseType or var.type.type.untypedef.name != :void
                        end
                        break
                when 'asm', '__asm', '__asm__'
                        # GCC function redirection
                        # void foo(void) __asm__("bar");  =>  when code uses 'foo', silently redirect to 'bar' instead
                        raise tok if nofunc or not var.kind_of? Variable or not var.type.kind_of? Function
                        # most of the time, 'bar' is not defined anywhere, so we support it only
                        # to allow parsing of headers using it, hoping noone will actually use them
                        unused = Asm.parse(self, scope)
                        puts "unsupported gcc-style __asm__ function redirect #{var.name.inspect} => #{unused.body.inspect}" if $VERBOSE
                        break
                when '='
                        # variable initialization
                        raise tok, '"{" or ";" expected' if var.type.kind_of? Function
                        raise tok, 'cannot initialize extern variable' if var.storage == :extern
                        scope.symbol[var.name] = var       # allow initializer to reference var, eg 'void *ptr = &ptr;'
                        var.initializer = var.type.parse_initializer(self, scope)
                        if var.initializer.kind_of?(CExpression) and (scope == @toplevel or var.storage == :static)
                                raise tok, "initializer for static #{var.name} is not constant" if not var.initializer.constant?
                        end
                        reference_value = lambda { |e, v|
                                found = false
                                case e
                                when Variable; found = true if e == v
                                when CExpression; e.walk { |ee| found ||= reference_value[ee, v] } if e.op != :& or e.lexpr
                                end
                                found
                        }
                        raise tok, "initializer for #{var.name} is not constant (selfreference)" if reference_value[var.initializer, var]
                        raise tok || self, '"," or ";" expected' if not tok = skipspaces or tok.type != :punct
                else
                        scope.symbol[var.name] = var
                end

                case tok.raw
                when ','; nofunc = true
                when ';'; break
                when '}'; unreadtok(tok); break
                else raise tok, '";" or "," expected'
                end
        end
        true
end
parse_file(file) click to toggle source

parses a C file

# File metasm/parse_c.rb, line 1176
def parse_file(file)
        parse(File.read(file), file)
end
parse_pragma_callback(otok) click to toggle source
# File metasm/parse_c.rb, line 1231
def parse_pragma_callback(otok)
        case otok.raw
        when 'pack'
                nil while lp = @lexer.readtok and lp.type == :space
                nil while rp = @lexer.readtok and rp.type == :space
                if not rp or rp.type != :punct or rp.raw != ')'
                        v1 = rp
                        nil while rp = @lexer.readtok and rp.type == :space
                end
                if rp and rp.type == :punct and rp.raw == ','
                        nil while v2 = @lexer.readtok and v2.type == :space
                        nil while rp = @lexer.readtok and rp.type == :space
                end
                raise otok if not rp or lp.type != :punct or rp.type != :punct or lp.raw != '(' or rp.raw != ')'
                raise otok if (v1 and v1.type != :string) or (v2 and (v2.type != :string or v2.raw =~ /[^\d]/))
                if not v1
                        @pragma_pack = nil
                elsif v1.raw == 'push'
                        @pragma_pack_stack ||= []
                        @pragma_pack_stack << pragma_pack
                        @pragma_pack = v2.raw.to_i if v2
                        raise v2, 'bad pack value' if pragma_pack == 0
                elsif v1.raw == 'pop'
                        @pragma_pack_stack ||= []
                        raise v1, 'pack stack empty' if @pragma_pack_stack.empty?
                        @pragma_pack = @pragma_pack_stack.pop
                        @pragma_pack = v2.raw.to_i if v2 and v2.raw        # #pragma pack(pop, 4) => pop stack, but use 4 as pack value (imho)
                        raise v2, 'bad pack value' if @pragma_pack == 0
                elsif v1.raw =~ /^\d+$/
                        raise v2, '2nd arg unexpected' if v2
                        @pragma_pack = v1.raw.to_i
                        raise v1, 'bad pack value' if @pragma_pack == 0
                else raise otok
                end
                # the caller checks for :eol
        when 'warning'
                if $DEBUG
                        @prev_pragma_callback[otok]
                else
                        # silent discard
                        nil while tok = @lexer.readtok_nopp and tok.type != :eol
                        @lexer.unreadtok tok
                end
        when 'prepare_visualstudio'
                prepare_visualstudio
        when 'prepare_gcc'
                prepare_gcc
        when 'data_model'    # XXX use carefully, should be the very first thing parsed
                nil while lp = @lexer.readtok and lp.type == :space
                if lp.type != :string or lp.raw !~ /^s?[il]?lp(16|32|64)$/ or not respond_to? lp.raw
                        raise lp, "invalid data model (use lp32/lp64/llp64/ilp64)"
                else
                        send lp.raw
                end
        else @prev_pragma_callback[otok]
        end
end
parse_statement(scope, nest) click to toggle source

returns a statement or raise

# File metasm/parse_c.rb, line 1732
def parse_statement(scope, nest)
        raise self, 'statement expected' if not tok = skipspaces

        if tok.type == :punct and tok.raw == '{'
                body = Block.new scope
                loop do
                        raise tok || self, '"}" expected' if not tok = skipspaces
                        break if tok.type == :punct and tok.raw == '}'
                        unreadtok tok
                        if not parse_definition(body)
                                body.statements << parse_statement(body, nest)
                        end
                end
                return body
        elsif tok.type == :punct and tok.raw == ';'
                return Block.new(scope)
        elsif tok.type != :string
                unreadtok tok
                raise tok, 'expr expected' if not expr = CExpression.parse(self, scope)
                checkstatementend(tok)

                if $VERBOSE and not nest.include?(:expression) and (expr.op or not expr.type.untypedef.kind_of? BaseType or expr.type.untypedef.name != :void) and CExpression.constant?(expr)
                        puts tok.exception("statement with no effect : #{expr}").message
                end
                return expr
        end

        case tok.raw
        when 'if'
                If.parse      self, scope, nest
        when 'while'
                While.parse   self, scope, nest
        when 'do'
                DoWhile.parse self, scope, nest
        when 'for'
                For.parse     self, scope, nest
        when 'switch'
                Switch.parse  self, scope, nest
        when 'goto'
                raise tok || self, 'label expected' if not tok = skipspaces or tok.type != :string
                name = tok.raw
                checkstatementend(tok)
                Goto.new name
        when 'return'
                expr = CExpression.parse(self, scope)       # nil allowed
                raise tok || self, "cannot return #{expr} in function returning void" if expr and nest[0].kind_of?(Type) and nest[0].void?
                p, i = nest[0].pointer?, nest[0].integral? if expr
                r = expr.reduce(self) if p or i
                if (not p and not i) or (i and not r.kind_of? ::Integer) or (p and r != 0)
                        check_compatible_type(tok, (expr ? expr.type : BaseType.new(:void)), nest[0])
                end
                checkstatementend(tok)
                Return.new expr
        when 'case'
                raise tok, 'case out of switch' if not nest.include? :switch
                Case.parse    self, scope, nest
        when 'default'
                raise tok || self, '":" expected' if not tok = skipspaces or tok.type != :punct or tok.raw != ':'
                raise tok, 'case out of switch' if not nest.include? :switch
                Case.new 'default', nil, parse_statement(scope, nest)
        when 'continue'
                checkstatementend(tok)
                raise tok, 'continue out of loop' if not nest.include? :loop
                Continue.new
        when 'break'
                checkstatementend(tok)
                raise tok, 'break out of loop' if not nest.include? :loop and not nest.include? :switch
                Break.new
        when 'asm', '__asm', '__asm__'
                Asm.parse self, scope
        else
                if ntok = skipspaces and ntok.type == :punct and ntok.raw == ':'
                        begin
                                st = parse_statement(scope, nest)
                        rescue ParseError
                                puts "label without statement, #{$!.message}" if $VERBOSE
                        end
                        Label.new tok.raw, st
                else
                        unreadtok ntok
                        unreadtok tok
                        raise tok, 'expr expected' if not expr = CExpression.parse(self, scope)
                        checkstatementend(tok)

                        if $VERBOSE and not nest.include?(:expression) and (expr.op or not expr.type.untypedef.kind_of? BaseType or expr.type.untypedef.name != :void) and CExpression.constant?(expr)
                                puts tok.exception("statement with no effect : #{expr}").message
                        end
                        expr
                end
        end
end
parse_toplevel_statement(scope) click to toggle source

parses toplevel statements, return nil if none found toplevel statements are ';' and 'asm <..>'

# File metasm/parse_c.rb, line 1719
def parse_toplevel_statement(scope)
        if tok = skipspaces and tok.type == :punct and tok.raw == ';'
                true
        elsif tok and tok.type == :punct and tok.raw == '{'
                raise tok || self, '"}" expected' if not tok = skipspaces or tok.type != :punct or tok.raw != '}'
                true
        elsif tok and tok.type == :string and %w[asm __asm __asm__].include? tok.raw
                scope.statements << Asm.parse(self, scope)
                true
        end
end
precompile() click to toggle source
# File metasm/compile_c.rb, line 13
def precompile
        @toplevel.precompile(Compiler.new(self, @program))
        self
end
prepare_gcc() click to toggle source
# File metasm/parse_c.rb, line 1302
                def prepare_gcc
                        @lexer.define_weak('__GNUC__', 2)    # otherwise __attribute__ is defined to void..
                        @lexer.define_weak('__STDC__')
                        @lexer.define_weak('__const', 'const')
                        @lexer.define_weak('__signed', 'signed')
                        @lexer.define_weak('__signed__', 'signed')
                        @lexer.define_weak('__volatile', 'volatile')
                        if not @lexer.definition['__builtin_constant_p']
                                # magic macro to check if its arg is an immediate value
                                @lexer.define_weak('__builtin_constant_p', '0')
                                @lexer.definition['__builtin_constant_p'].args = [Preprocessor::Token.new([])]
                        end
                        @lexer.nodefine_strong('alloca')             # TODO __builtin_alloca
                        @lexer.hooked_include['stddef.h'] = <<EOH
/* simplified, define all at first invocation. may break things... */
#undef __need_ptrdiff_t
#undef __need_size_t
#undef __need_wint_t
#undef __need_wchar_t
#undef __need_NULL
#undef NULL
#if !defined (_STDDEF_H)
#define _STDDEF_H
#define __PTRDIFF_TYPE__ long int
typedef __PTRDIFF_TYPE__ ptrdiff_t;
#define __SIZE_TYPE__ long unsigned int
typedef __SIZE_TYPE__ size_t;
#define __WINT_TYPE__ unsigned int
typedef __WINT_TYPE__ wint_t;
#define __WCHAR_TYPE__ int
typedef __WCHAR_TYPE__ wchar_t;
#define NULL 0
#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
#endif
EOH
                        # TODO va_args
                        @lexer.hooked_include['stdarg.h'] = <<EOH
// TODO
typedef void* __gnuc_va_list;
/*
typedef void* va_list;
#define va_start(v, l)
#define va_end(v)
#define va_arg(v, l)
#define va_copy(d, s)
*/
EOH
                        @lexer.hooked_include['limits.h'] = <<EOH
#define CHAR_BIT      8
#define SCHAR_MIN     (-128)
#define SCHAR_MAX     127
#define UCHAR_MAX     255
#ifdef __CHAR_UNSIGNED__
#   define CHAR_MIN     0
#   define CHAR_MAX     UCHAR_MAX
#else
#   define CHAR_MIN     SCHAR_MIN
#   define CHAR_MAX     SCHAR_MAX
#endif
#define UINT_MAX       #{(1 << (8*@typesize[:int]))-1}U
#define INT_MAX       (UINT_MAX >> 1)
#define INT_MIN       (-INT_MAX - 1)
#define ULONG_MAX       #{(1 << (8*@typesize[:long]))-1}UL
#define LONG_MAX       (ULONG_MAX >> 1L)
#define LONG_MIN       (-LONG_MAX - 1L)
EOH
                end
prepare_visualstudio() click to toggle source
# File metasm/parse_c.rb, line 1289
def prepare_visualstudio
        @lexer.define_weak('_WIN32')
        @lexer.define_weak('_WIN32_WINNT', 0x500)
        @lexer.define_weak('_INTEGRAL_MAX_BITS', 64)
        @lexer.define_weak('__w64')
        @lexer.define_weak('_cdecl', '__cdecl')      # typo ? seen in winreg.h
        @lexer.define_weak('_fastcall', '__fastcall')        # typo ? seen in ntddk.h
        @lexer.define_weak('_MSC_VER', 1300) # handle '#pragma once' and _declspec(noreturn)
        @lexer.define_weak('__forceinline', '__inline')
        @lexer.define_weak('__ptr32')        # needed with msc_ver 1300, don't understand their use
        @lexer.define_weak('__ptr64')
end
readtok() click to toggle source

reads a token from self.lexer concatenates strings, merges spaces/eol to ' ', handles wchar strings, allows $@_ in :string

# File metasm/parse_c.rb, line 1477
def readtok
        if not t = @unreadtoks.pop
                return if not t = readtok_longstr
                case t.type
                when :space, :eol
                        # merge consecutive :space/:eol
                        t = t.dup
                        t.type = :space
                        t.raw = ' '
                        nil while nt = @lexer.readtok and (nt.type == :eol or nt.type == :space)
                        @lexer.unreadtok nt

                when :quoted
                        # merge consecutive :quoted
                        t = t.dup
                        while nt = readtok_longstr
                                case nt.type
                                when :quoted
                                        if t.raw[0] == " and nt.raw[0, 2] == 'L"'
                                                # ensure wide prefix is set
                                                t.raw[0, 0] = 'L'
                                        end
                                        t.raw << ' ' << nt.raw
                                        t.value << nt.value
                                when :space, :eol
                                else break
                                end
                        end
                        @lexer.unreadtok nt
                else
                        if (t.type == :punct and (t.raw == '_' or t.raw == '@' or t.raw == '$')) or t.type == :string
                                t = t.dup
                                t.type = :string
                                nt = nil
                                t.raw << nt.raw while nt = @lexer.readtok and ((nt.type == :punct and (nt.raw == '_' or nt.raw == '@' or nt.raw == '$')) or nt.type == :string)
                                @lexer.unreadtok nt
                        end
                end
        end
        t
end
sanity_checks() click to toggle source

C sanity checks

# File metasm/parse_c.rb, line 1371
def sanity_checks
        return if not $VERBOSE
        #  TODO
end
sizeof(var, type=nil) click to toggle source

returns the size of a type in bytes

# File metasm/parse_c.rb, line 1541
def sizeof(var, type=nil)
        var, type = nil, var if var.kind_of? Type and not type
        type ||= var.type
        # XXX double-check class apparition order ('when' checks inheritance)
        case type
        when Array
                case type.length
                when nil
                        if var.kind_of? CExpression and not var.lexpr and not var.op and var.rexpr.kind_of? Variable
                                var = var.rexpr
                        end
                        raise self, 'unknown array size' if not var.kind_of? Variable or not var.initializer
                        init = var.initializer
                        init = init.rexpr if init.kind_of? C::CExpression and not init.op and init.rexpr.kind_of? ::String
                        case init
                        when ::String; sizeof(nil, type.type) * (init.length + 1)
                        when ::Array
                                v = init.compact.first
                                v ? (sizeof(nil, type.type) * init.length) : 0
                        else sizeof(init)
                        end
                when ::Integer; type.length * sizeof(type.type)
                when CExpression
                        len = type.length.reduce(self)
                        raise self, 'unknown array size' if not len.kind_of? ::Integer
                        len * sizeof(type)
                else raise self, 'unknown array size'
                end
        when Pointer
                if var.kind_of? CExpression and not var.op and var.rexpr.kind_of? ::String
                        # sizeof("lolz") => 5
                        sizeof(nil, type.type) * (var.rexpr.length + 1)
                else
                        @typesize[:ptr]
                end
        when Function
                # raise
                1   # gcc
        when BaseType
                @typesize[type.name]
        when Enum
                @typesize[:int]
        when Struct
                raise self, "unknown structure size #{type.name}" if not type.members
                al = type.align(self)
                al = 1 if (var.kind_of?(Attributes) and var.has_attribute('sizeof_packed')) or type.has_attribute('sizeof_packed')
                lm = type.members.last
                lm ? (type.offsetof(self, lm) + sizeof(lm) + al - 1) / al * al : 0
        when Union
                raise self, "unknown structure size #{type.name}" if not type.members
                type.members.map { |m| sizeof(m) }.max || 0
        when TypeDef
                sizeof(var, type.type)
        end
end
skipspaces() click to toggle source

returns the next non-space/non-eol token

# File metasm/parse_c.rb, line 1528
def skipspaces
        nil while t = readtok and t.type == :space
        t
end
to_s() click to toggle source
# File metasm/parse_c.rb, line 3319
def to_s
        @toplevel.dump(nil)[0].join("\n")
end
unreadtok(tok) click to toggle source
# File metasm/parse_c.rb, line 1523
def unreadtok(tok)
        @unreadtoks << tok if tok
end

Private Instance Methods

readtok_longstr() click to toggle source

reads a token, convert 'L“foo”' to a :quoted

# File metasm/parse_c.rb, line 1456
def readtok_longstr
        if t = @lexer.readtok and t.type == :string and t.raw == 'L' and
        nt = @lexer.readtok and nt.type == :quoted and nt.raw[0] == "
                nt.raw[0, 0] = 'L'
                nt
        elsif t and t.type == :punct and t.raw == '/' and
        # nt has not been read
        nt = @lexer.readtok and nt.type == :punct and nt.raw == '/'
                # windows.h has a #define some_type_name /##/, and VS interprets this as a comment..
                puts @lexer.exception('#defined //').message if $VERBOSE
                t = @lexer.readtok while t and t.type != :eol
                t
        else
                @lexer.unreadtok nt
                t
        end
end