class Metasm::Decompiler

Attributes

c_parser[RW]
dasm[RW]
forbid_all_optimizations[RW]
forbid_decompile_ifwhile[RW]
forbid_decompile_types[RW]
forbid_optimize_code[RW]
forbid_optimize_dataflow[RW]
forbid_optimize_labels[RW]
recurse[RW]

recursive flag: for each subfunction, recurse is decremented, when 0 only the prototype is decompiled, when <0 nothing is done

Public Class Methods

new(dasm, cp = dasm.c_parser) click to toggle source
# File metasm/decompile.rb, line 24
def initialize(dasm, cp = dasm.c_parser)
        @dasm = dasm
        @recurse = 1/0.0      # Infinity
        @c_parser = cp || @dasm.cpu.new_cparser
        @dasm.cpu.decompile_init(self) if @dasm.cpu.respond_to?(:decompile_init)
end

Public Instance Methods

backtrace_target(expr, addr) click to toggle source

backtraces an expression from addr returns an integer, a label name, or an Expression XXX '(GetProcAddr(“foo”))()' should not decompile to 'foo()'

# File metasm/decompile.rb, line 319
def backtrace_target(expr, addr)
        if n = @dasm.backtrace(expr, addr).first
                return expr if n == Expression::Unknown
                n = Expression[n].reduce_rec
                n = @dasm.get_label_at(n) || n
                n = $1 if n.kind_of?(::String) and n =~ /^thunk_(.*)/
                n
        else
                expr
        end
end
c_to_graph(stmt) click to toggle source
# File metasm/decompile.rb, line 2153
def c_to_graph(stmt)
        CGraph.new.build(stmt)
end
ce_patch(exprs, oldce, newce) click to toggle source

patches a set of exprs, replacing oldce by newce

# File metasm/decompile.rb, line 978
def ce_patch(exprs, oldce, newce)
        walk_ce(exprs) { |ce|
                case ce.op
                when :funcall
                        ce.lexpr = newce if ce.lexpr == oldce
                        ce.rexpr.each_with_index { |a, i| ce.rexpr[i] = newce if a == oldce }
                else
                        ce.lexpr = newce if ce.lexpr == oldce
                        ce.rexpr = newce if ce.rexpr == oldce
                end
        }
end
ce_read(ce_, var) click to toggle source

checks if expr reads var

# File metasm/decompile.rb, line 957
def ce_read(ce_, var)
        isvar(ce_, var) or
        walk_ce(ce_) { |ce|
                case ce.op
                when :funcall; break true if isvar(ce.lexpr, var) or ce.rexpr.find { |a| isvar(a, var) }
                when :'='; break true if isvar(ce.rexpr, var)
                        break ce_read(ce.rexpr, var) if isvar(ce.lexpr, var)        # *&var = 2
                else break true if isvar(ce.lexpr, var) or isvar(ce.rexpr, var)
                end
        }
end
ce_write(ce_, var) click to toggle source

checks if expr writes var

# File metasm/decompile.rb, line 970
def ce_write(ce_, var)
        walk_ce(ce_) { |ce|
                break true if C::CExpression::AssignOp.include?(ce.op) and (isvar(ce.lexpr, var) or
                        (((ce.op == :'++' or ce.op == :'--') and isvar(ce.rexpr, var))))
        }
end
cleanup_var_decl(scope, func) click to toggle source

reorder statements to put decl first, move assignments to decl, move args to func prototype

# File metasm/decompile.rb, line 2807
def cleanup_var_decl(scope, func)
        scope.symbol.each_value { |v| v.type = C::BaseType.new(:int) if v.type.void? }

        args = func.type.args
        decl = []
        scope.statements.delete_if { |sm|
                next if not sm.kind_of?(C::Declaration)
                if sm.var.stackoff.to_i > 0 and sm.var.name !~ /_a(\d+)$/    # aliased vars: use 1st domain only
                        args << sm.var
                else
                        decl << sm
                end
                true
        }

        # move trivial affectations to initialiser
        # XXX a = 1 ; b = a ; a = 2
        go = true     # break from delete_if does not delete..
        scope.statements.delete_if { |st|
                if go and st.kind_of?(C::CExpression) and st.op == :'=' and st.rexpr.kind_of?(C::CExpression) and not st.rexpr.op and
                                st.rexpr.rexpr.kind_of?(::Integer) and st.lexpr.kind_of?(C::Variable) and scope.symbol[st.lexpr.name]
                        st.lexpr.initializer = st.rexpr
                else
                        go = false
                end
        }

        # reorder declarations
        scope.statements[0, 0] = decl.sort_by { |sm| [-sm.var.stackoff.to_i, sm.var.name] }

        # ensure arglist has no hole (create&add unreferenced args)
        func.type.args = []
        argoff = @c_parser.typesize[:ptr]
        args.sort_by { |sm| sm.stackoff.to_i }.each { |a|
                # XXX misalignment ?
                if not curoff = a.stackoff
                        func.type.args << a # __fastcall
                        next
                end
                while curoff > argoff
                        wantarg = C::Variable.new
                        wantarg.name = stackoff_to_varname(argoff)
                        wantarg.type = C::BaseType.new(:int)
                        wantarg.attributes = ['unused']
                        func.type.args << wantarg
                        scope.symbol[wantarg.name] = wantarg
                        argoff += @c_parser.typesize[:ptr]
                end
                func.type.args << a
                argoff += @c_parser.typesize[:ptr]
        }

        # use user-supplied names
        scope.symbol.keys.each { |s|
                v = scope.symbol[s]
                next if not v.kind_of?(C::Variable)
                v.misc ||= {}
                uan = v.misc[:unalias_name] ||= s
                if newname = scope.decompdata[:unalias_name][uan] and newname != s
                        v.name = newname
                        scope.symbol[newname] = scope.symbol.delete(s)
                end
        }
end
decompile(*entry) click to toggle source

decompile recursively function from an entrypoint, then perform global optimisation (static vars, …) should be called once after everything is decompiled (global optimizations may bring bad results otherwise) use decompile_func for incremental decompilation returns the c_parser

# File metasm/decompile.rb, line 35
def decompile(*entry)
        entry.each { |f| decompile_func(f) }
        finalize
        @c_parser
end
decompile_c_types(scope) click to toggle source

assign type to vars (regs, stack & global) types are found by subfunction argument types & indirections, and propagated through assignments etc TODO when updating the type of a var, update the type of all cexprs where it appears

# File metasm/decompile.rb, line 1238
def decompile_c_types(scope)
        return if forbid_decompile_types

        # TODO *(int8*)(ptr+8); *(int32*)(ptr+12) => automatic struct

        # name => type
        types = {}

        pscopevar = lambda { |e|
                e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.rexpr.kind_of?(C::CExpression)
                if e.kind_of?(C::CExpression) and e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable)
                        e.rexpr.name if scope.symbol[e.rexpr.name]
                end
        }
        scopevar = lambda { |e|
                e = e.rexpr if e.kind_of?(C::CExpression) and not e.op
                if e.kind_of?(C::Variable) and scope.symbol[e.name]
                        e.name
                elsif e.kind_of?(C::CExpression) and e.op == :* and not e.lexpr
                        pscopevar[e.rexpr]
                end
        }
        globalvar = lambda { |e|
                e = e.rexpr if e.kind_of?(C::CExpression) and not e.op
                if e.kind_of?(::Integer) and e > 0x10000 and @dasm.get_section_at(e)
                        e
                elsif e.kind_of?(C::Variable) and not scope.symbol[e.name] and @c_parser.toplevel.symbol[e.name] and @dasm.get_section_at(e.name)
                        e.name
                end
        }

        # check if a newly found type for o is better than current type
        # order: foo* > void* > foo
        better_type = lambda { |t0, t1|
                t1 == C::BaseType.new(:void) or (t0.pointer? and t1.kind_of?(C::BaseType)) or t0.untypedef.kind_of?(C::Union) or
                (t0.kind_of?(C::BaseType) and t1.kind_of?(C::BaseType) and (@c_parser.typesize[t0.name] > @c_parser.typesize[t1.name] or (t0.name == t1.name and t0.qualifier))) or
                (t0.pointer? and t1.pointer? and better_type[t0.pointed, t1.pointed])
        }

        update_global_type = lambda { |e, t|
                if ne = new_global_var(e, t, scope)
                        ne.type = t if better_type[t, ne.type]      # TODO patch existing scopes using ne
                                                                # TODO rename (dword_xx -> byte_xx etc)
                        e = scope.symbol_ancestors[e] || e if e.kind_of?(String)    # exe reloc
                        walk_ce(scope) { |ce|
                                ce.lexpr = ne if ce.lexpr == e
                                ce.rexpr = ne if ce.rexpr == e
                                if ce.op == :* and not ce.lexpr and ce.rexpr == ne and ne.type.pointer? and ne.type.pointed.untypedef.kind_of?(C::Union)
                                        # *struct -> struct->bla
                                        ce.rexpr = structoffset(ne.type.pointed.untypedef, ce.rexpr, 0, sizeof(ce.type))
                                elsif ce.lexpr == ne or ce.rexpr == ne
                                        # set ce type according to l/r
                                        # TODO set ce.parent type etc
                                        ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type
                                end
                        }
                end
        }

        propagate_type = nil  # fwd declaration
        propagating = []      # recursion guard (x = &x)
        # check if need to change the type of a var
        # propagate_type if type is updated
        update_type = lambda { |n, t|
                next if propagating.include?(n)
                o = scope.symbol[n].stackoff
                next if not o and t.untypedef.kind_of?(C::Union)
                next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n]
                next if t0 = types[n] and not better_type[t, t0]
                next if o and (t.integral? or t.pointer?) and o % sizeof(t) != 0 # keep vars aligned
                types[n] = t
                next if t == t0
                propagating << n
                propagate_type[n, t]
                propagating.delete n
                next if not o
                t = t.untypedef
                if t.kind_of?(C::Struct)
                        t.members.to_a.each { |m|
                                mo = t.offsetof(@c_parser, m.name)
                                next if mo == 0
                                scope.symbol.each { |vn, vv|
                                        update_type[vn, m.type] if vv.stackoff == o+mo
                                }
                        }
                end
        }

        # try to update the type of a var from knowing the type of an expr (through dereferences etc)
        known_type = lambda { |e, t|
                loop do
                        e = e.rexpr while e.kind_of?(C::CExpression) and not e.op and e.type == t
                        if o = scopevar[e]
                                update_type[o, t]
                        elsif o = globalvar[e]
                                update_global_type[o, t]
                        elsif not e.kind_of?(C::CExpression)
                        elsif o = pscopevar[e] and t.pointer?
                                update_type[o, t.pointed]
                        elsif e.op == :* and not e.lexpr
                                e = e.rexpr
                                t = C::Pointer.new(t)
                                next
                        elsif t.pointer? and e.op == :+ and e.lexpr.kind_of?(C::CExpression) and e.lexpr.type.integral? and e.rexpr.kind_of?(C::Variable)
                                e.lexpr, e.rexpr = e.rexpr, e.lexpr
                                next
                        elsif e.op == :+ and e.lexpr and e.rexpr.kind_of?(C::CExpression)
                                if not e.rexpr.op and e.rexpr.rexpr.kind_of?(::Integer)
                                        if t.pointer? and e.rexpr.rexpr < 0x1000 and (e.rexpr.rexpr % sizeof(t.pointed)) == 0     # XXX relocatable + base=0..
                                                e = e.lexpr      # (int)*(x+2) === (int) *x
                                                next
                                        elsif globalvar[e.rexpr.rexpr]
                                                known_type[e.lexpr, C::BaseType.new(:int)]
                                                e = e.rexpr
                                                next
                                        end
                                elsif t.pointer? and (e.lexpr.kind_of?(C::CExpression) and e.lexpr.lexpr and [:<<, :>>, :*, :&].include?(e.lexpr.op)) or
                                                (o = scopevar[e.lexpr] and types[o] and types[o].integral? and
                                                 !(o = scopevar[e.rexpr] and types[o] and types[o].integral?))
                                        e.lexpr, e.rexpr = e.rexpr, e.lexpr       # swap
                                        e = e.lexpr
                                        next
                                elsif t.pointer? and ((e.rexpr.kind_of?(C::CExpression) and e.rexpr.lexpr and [:<<, :>>, :*, :&].include?(e.rexpr.op)) or
                                                (o = scopevar[e.rexpr] and types[o] and types[o].integral? and
                                                !(o = scopevar[e.lexpr] and types[o] and types[o].integral?)))
                                        e = e.lexpr
                                        next
                                end
                        end
                        break
                end
        }

        # we found a type for a var, propagate it through affectations
        propagate_type = lambda { |var, type|
                walk_ce(scope) { |ce|
                        next if ce.op != :'='

                        if ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == var
                                known_type[ce.rexpr, type]
                                next
                        end
                        if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == var
                                known_type[ce.lexpr, type]
                                next
                        end

                        # int **x; y = **x  =>  int y
                        t = type
                        l = ce.lexpr
                        while l.kind_of?(C::CExpression) and l.op == :* and not l.lexpr
                                if var == pscopevar[l.rexpr]
                                        known_type[ce.rexpr, t]
                                        break
                                elsif t.pointer?
                                        l = l.rexpr
                                        t = t.pointed
                                else break
                                end
                        end

                        # int **x; **x = y  =>  int y
                        t = type
                        r = ce.rexpr
                        while r.kind_of?(C::CExpression) and r.op == :* and not r.lexpr
                                if var == pscopevar[r.rexpr]
                                        known_type[ce.lexpr, t]
                                        break
                                elsif t.pointer?
                                        r = r.rexpr
                                        t = t.pointed
                                else break
                                end
                        end

                        # TODO int *x; *x = *y; ?
                }
        }

        # put all those macros in use
        # use user-defined types first
        scope.symbol.each_value { |v|
                next if not v.kind_of?(C::Variable) or not t = scope.decompdata[:unalias_type][v.name]
                known_type[v, t]
        }

        # try to infer types from C semantics
        later = []
        walk_ce(scope) { |ce|
                if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :funcall or (ce.rexpr.op == nil and ce.rexpr.rexpr.kind_of?(::Integer) and
                                ce.rexpr.rexpr.abs < 0x10000 and (not ce.lexpr.kind_of?(C::CExpression) or ce.lexpr.op != :'*' or ce.lexpr.lexpr)))
                        # var = int
                        known_type[ce.lexpr, ce.rexpr.type]
                elsif ce.op == :funcall
                        f = ce.lexpr.type
                        f = f.pointed if f.pointer?
                        next if not f.kind_of?(C::Function)
                        # cast func args to arg prototypes
                        f.args.to_a.zip(ce.rexpr).each_with_index { |(proto, arg), i| ce.rexpr[i] = C::CExpression[arg, proto.type] ; known_type[arg, proto.type] }
                elsif ce.op == :* and not ce.lexpr
                        if e = ce.rexpr and e.kind_of?(C::CExpression) and not e.op and e = e.rexpr and e.kind_of?(C::CExpression) and
                                        e.op == :& and not e.lexpr and e.rexpr.kind_of?(C::Variable) and e.rexpr.stackoff
                                # skip *(__int32*)&var_12 for now, avoid saying var12 is an int if it may be a ptr or anything
                                later << [ce.rexpr, C::Pointer.new(ce.type)]
                                next
                        end
                        known_type[ce.rexpr, C::Pointer.new(ce.type)]
                elsif not ce.op and ce.type.pointer? and ce.type.pointed.kind_of?(C::Function)
                        # cast to fptr: must be a fptr
                        known_type[ce.rexpr, ce.type]
                end
        }

        later.each { |ce, t| known_type[ce, t] }

        # offsets have types now
        types.each { |v, t|
                # keep var type qualifiers
                q = scope.symbol[v].type.qualifier
                scope.symbol[v].type = t
                t.qualifier = q if q
        }


        # remove offsets to struct members
        # XXX this defeats antialiasing
        # off => [structoff, membername, membertype]
        memb = {}
        types.dup.each { |n, t|
                v = scope.symbol[n]
                next if not o = v.stackoff
                t = t.untypedef
                if t.kind_of?(C::Struct)
                        t.members.to_a.each { |tm|
                                moff = t.offsetof(@c_parser, tm.name)
                                next if moff == 0
                                types.delete_if { |vv, tt| scope.symbol[vv].stackoff == o+moff }
                                memb[o+moff] = [v, tm.name, tm.type]
                        }
                end
        }

        # patch local variables into the CExprs, incl unknown offsets
        varat = lambda { |n|
                v = scope.symbol[n]
                if s = memb[v.stackoff]
                        v = C::CExpression[s[0], :'.', s[1], s[2]]
                else
                        v.type = types[n] || C::BaseType.new(:int)
                end
                v
        }

        maycast = lambda { |v, e|
                if sizeof(v) != sizeof(e)
                        v = C::CExpression[:*, [[:&, v], C::Pointer.new(e.type)]]
                end
                v
        }
        maycast_p = lambda { |v, e|
                if not e.type.pointer? or sizeof(v) != sizeof(nil, e.type.pointed)
                        C::CExpression[[:&, v], e.type]
                else
                        C::CExpression[:&, v]
                end
        }

        walk_ce(scope, true) { |ce|
                case
                when ce.op == :funcall
                        ce.rexpr.map! { |re|
                                if o = scopevar[re]; C::CExpression[maycast[varat[o], re]]
                                elsif o = pscopevar[re]; C::CExpression[maycast_p[varat[o], re]]
                                else re
                                end
                        }
                when o = scopevar[ce.lexpr]; ce.lexpr = maycast[varat[o], ce.lexpr]
                when o = scopevar[ce.rexpr]; ce.rexpr = maycast[varat[o], ce.rexpr]
                        ce.rexpr = C::CExpression[ce.rexpr] if not ce.op and ce.rexpr.kind_of?(C::Variable)
                when o = pscopevar[ce.lexpr]; ce.lexpr = maycast_p[varat[o], ce.lexpr]
                when o = pscopevar[ce.rexpr]; ce.rexpr = maycast_p[varat[o], ce.rexpr]
                when o = scopevar[ce]; ce.replace C::CExpression[maycast[varat[o], ce]]
                when o = pscopevar[ce]; ce.replace C::CExpression[maycast_p[varat[o], ce]]
                end
        }

        fix_type_overlap(scope)
        fix_pointer_arithmetic(scope)

        # if int32 var_4 is always var_4 & 255, change type to int8
        varuse = Hash.new(0)
        varandff = Hash.new(0)
        varandffff = Hash.new(0)
        walk_ce(scope) { |ce|
                if ce.op == :& and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
                        case ce.rexpr.rexpr
                        when 0xff; varandff[ce.lexpr.name] += 1
                        when 0xffff; varandffff[ce.lexpr.name] += 1
                        end
                end
                varuse[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
                varuse[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
        }
        varandff.each { |k, v|
                scope.symbol[k].type = C::BaseType.new(:__int8, :unsigned) if varuse[k] == v
        }
        varandffff.each { |k, v|
                scope.symbol[k].type = C::BaseType.new(:__int16, :unsigned) if varuse[k] == v
        }

        # propagate types to cexprs
        walk_ce(scope, true) { |ce|
                if ce.op
                        ce.type = C::CExpression[ce.lexpr, ce.op, ce.rexpr].type rescue next
                        if ce.op == :'=' and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type != ce.type and (not ce.rexpr.type.integral? or not ce.type.integral?)
                                known_type[ce.rexpr, ce.type] if ce.type.pointer? and ce.type.pointed.untypedef.kind_of?(C::Function)      # localvar = &struct with fptr
                                ce.rexpr = C::CExpression[[ce.rexpr], ce.type]
                        end
                elsif ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sizeof(ce.rexpr.rexpr.type) == sizeof(ce.type.pointed)
                        ce.type = ce.rexpr.type
                end
        }
end
decompile_c_types_again(scope) click to toggle source

use casts to determine variable types after code optimization if all uses of var_42 are through *(int*)(&var_42), set type to int

# File metasm/decompile.rb, line 1564
def decompile_c_types_again(scope)
        return if forbid_decompile_types

        update_type = lambda { |n, t|
                o = scope.symbol[n].stackoff
                next if not o and t.untypedef.kind_of?(C::Union)
                next if scope.decompdata[:unalias_type][n] and t != scope.decompdata[:unalias_type][n]
                scope.symbol[n].type = t
        }

        # true if e is a cast of a var address
        is_cast = lambda { |ce|
                true if not ce.op and not ce.lexpr and ce.rexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr
        }

        # scan code for casts
        uses = {}
        count_refs = Hash.new(0)
        walk_ce(scope) { |ce|
                count_refs[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
                count_refs[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
                if is_cast[ce] and ce.rexpr.rexpr.kind_of?(C::Variable)
                        (uses[ce.rexpr.rexpr.name] ||= []) << ce.type.pointed
                end
        }

        # given a list of types, return a type compatible with all
        summary_type = lambda { |type_list|
                t = type_list.first
                type_list.each { |tt|
                        if sizeof(t) != sizeof(tt) or t.integral? != tt.integral? or t.pointer? != tt.pointer?
                                t = nil
                                break
                        elsif t != tt
                                t = tt if t.to_s =~ /__/
                        end
                }
                t
        }

        updated = {}
        uses.each { |n, tl|
                if tl.length == count_refs[n] and t = summary_type[tl] and update_type[n, t]
                        updated[n] = true
                end
        }
        return if updated.empty?

        walk_ce(scope, true) { |ce|
                if is_cast[ce] and updated[ce.rexpr.rexpr.name]
                        ce.op = :&
                        ce.rexpr = ce.rexpr.rexpr
                elsif ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name]
                        ce.op = nil
                        ce.rexpr = ce.rexpr.rexpr
                elsif ce.op
                        if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::Variable) and updated[ce.rexpr.rexpr.name]
                                ce.rexpr = ce.rexpr.rexpr
                        end
                        if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and updated[ce.lexpr.rexpr.name]
                                ce.lexpr = ce.lexpr.rexpr
                        end
                end
        }
end
decompile_cexpr(e, scope, itype=nil) click to toggle source

turns an Expression to a CExpression, create+declares needed variables in scope

# File metasm/decompile.rb, line 397
def decompile_cexpr(e, scope, itype=nil)
        case e
        when Expression
                if e.op == :'=' and e.lexpr.kind_of?(::String) and e.lexpr =~ /^dummy_metasm_/
                        decompile_cexpr(e.rexpr, scope, itype)
                elsif e.op == :+ and e.rexpr.kind_of?(::Integer) and e.rexpr < 0
                        decompile_cexpr(Expression[e.lexpr, :-, -e.rexpr], scope, itype)
                elsif e.lexpr
                        a = decompile_cexpr(e.lexpr, scope, itype)
                        C::CExpression[a, e.op, decompile_cexpr(e.rexpr, scope, itype)]
                elsif e.op == :+
                        decompile_cexpr(e.rexpr, scope, itype)
                else
                        a = decompile_cexpr(e.rexpr, scope, itype)
                        C::CExpression[e.op, a]
                end
        when Indirection
                case e.len
                when 1, 2, 4, 8
                        bt = C::BaseType.new("__int#{e.len*8}".to_sym)
                else
                        bt = C::Struct.new
                        bt.members = [C::Variable.new('data', C::Array.new(C::BaseType.new(:__int8), e.len))]
                end
                itype = C::Pointer.new(bt)
                p = decompile_cexpr(e.target, scope, itype)
                p = C::CExpression[[p], itype]
                C::CExpression[:*, p]
        when ::Integer
                C::CExpression[e]
        when C::CExpression
                e
        else
                name = e.to_s
                if not s = scope.symbol_ancestors[name]
                        s = C::Variable.new
                        s.type = C::BaseType.new(:__int32)
                        case e
                        when ::String       # edata relocation (rel.length = size of pointer)
                                return @c_parser.toplevel.symbol[e] || new_global_var(e, itype || C::BaseType.new(:int), scope)
                        when ::Symbol; s.storage = :register ; s.add_attribute("register(#{name})")
                        else s.type.qualifier = [:volatile]
                                puts "decompile_cexpr unhandled #{e.inspect}, using #{e.to_s.inspect}" if $VERBOSE
                        end
                        s.name = name
                        scope.symbol[s.name] = s
                        scope.statements << C::Declaration.new(s)
                end
                s
        end
end
decompile_controlseq(scope) click to toggle source

changes ifgoto, goto to while/ifelse..

# File metasm/decompile.rb, line 531
def decompile_controlseq(scope)
        # TODO replace all this crap by a method using the graph representation
        scope.statements = decompile_cseq_if(scope.statements, scope)
        remove_labels(scope)
        scope.statements = decompile_cseq_if(scope.statements, scope)
        remove_labels(scope)
        # TODO harmonize _if/_while api (if returns a replacement, while patches)
        decompile_cseq_while(scope.statements, scope)
        decompile_cseq_switch(scope)
end
decompile_cseq_if(ary, scope) click to toggle source

ifgoto => ifthen ary is an array of statements where we try to find if () {} [else {}] recurses to then/else content

# File metasm/decompile.rb, line 679
def decompile_cseq_if(ary, scope)
        return ary if forbid_decompile_ifwhile
        # the array of decompiled statements to use as replacement
        ret = []
        # list of labels appearing in ary
        inner_labels = ary.grep(C::Label).map { |l| l.name }
        while s = ary.shift
                # recurse if it's not the first run
                if s.kind_of?(C::If)
                        s.bthen.statements = decompile_cseq_if(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block)
                        s.belse.statements = decompile_cseq_if(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block)
                end

                # if (a) goto x; if (b) goto x; => if (a || b) goto x;
                while s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and not s.belse and ary.first.kind_of?(C::If) and ary.first.bthen.kind_of?(C::Goto) and
                                not ary.first.belse and s.bthen.target == ary.first.bthen.target
                        s.test = C::CExpression[s.test, :'||', ary.shift.test]
                end

                # if (a) goto x; b; x:  => if (!a) { b; }
                if s.kind_of?(C::If) and s.bthen.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == s.bthen.target }
                        # if {goto l;} a; l: => if (!) {a;}
                        s.test = C::CExpression.negate s.test
                        s.bthen = C::Block.new(scope)
                        s.bthen.statements = decompile_cseq_if(ary[0..ary.index(l)], s.bthen)
                        s.bthen.statements.pop      # remove l: from bthen, it is in ary (was needed in bthen for inner ifs)
                        ary[0...ary.index(l)] = []
                end

                if s.kind_of?(C::If) and (s.bthen.kind_of?(C::Block) or s.bthen.kind_of?(C::Goto))
                        s.bthen = C::Block.new(scope, [s.bthen]) if s.bthen.kind_of?(C::Goto)

                        bts = s.bthen.statements

                        # if (a) if (b) { c; }  => if (a && b) { c; }
                        if bts.length == 1 and bts.first.kind_of?(C::If) and not bts.first.belse
                                s.test = C::CExpression[s.test, :'&&', bts.first.test]
                                bts = bts.first.bthen
                                bts = s.bthen.statements = bts.kind_of?(C::Block) ? bts.statements : [bts]
                        end

                        # if (a) { if (b) goto c; d; } c:  => if (a && !b) { d; }
                        if bts.first.kind_of?(C::If) and l = bts.first.bthen and (l = l.kind_of?(C::Block) ? l.statements.first : l) and l.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and l.target == ary[0].name
                                s.test = C::CExpression[s.test, :'&&', C::CExpression.negate(bts.first.test)]
                                if e = bts.shift.belse
                                        bts.unshift e
                                end
                        end

                        # if () { goto a; } a:
                        if bts.last.kind_of?(C::Goto) and ary[0].kind_of?(C::Label) and bts.last.target == ary[0].name
                                bts.pop
                        end

                        # if { a; goto outer; } b; return; => if (!) { b; return; } a; goto outer;
                        if bts.last.kind_of?(C::Goto) and not inner_labels.include?(bts.last.target) and g = ary.find { |ss| ss.kind_of?(C::Goto) or ss.kind_of?(C::Return) } and g.kind_of?(C::Return)
                                s.test = C::CExpression.negate s.test
                                ary[0..ary.index(g)], bts[0..-1] = bts, ary[0..ary.index(g)]
                        end

                        # if { a; goto l; } b; l: => if {a;} else {b;}
                        if bts.last.kind_of?(C::Goto) and l = ary.grep(C::Label).find { |l_| l_.name == bts.last.target }
                                s.belse = C::Block.new(scope)
                                s.belse.statements = decompile_cseq_if(ary[0...ary.index(l)], s.belse)
                                ary[0...ary.index(l)] = []
                                bts.pop
                        end

                        # if { a; l: b; goto any;} c; goto l; => if { a; } else { c; } b; goto any;
                        if not s.belse and (bts.last.kind_of?(C::Goto) or bts.last.kind_of?(C::Return)) and g = ary.grep(C::Goto).first and l = bts.grep(C::Label).find { |l_| l_.name == g.target }
                                s.belse = C::Block.new(scope)
                                s.belse.statements = decompile_cseq_if(ary[0...ary.index(g)], s.belse)
                                ary[0..ary.index(g)], bts[bts.index(l)..-1] = bts[bts.index(l)..-1], []
                        end

                        # if { a; b; c; } else { d; b; c; } => if {a;} else {d;} b; c;
                        if s.belse
                                bes = s.belse.statements
                                while not bts.empty?
                                        if bts.last.kind_of?(C::Label); ary.unshift bts.pop
                                        elsif bes.last.kind_of?(C::Label); ary.unshift bes.pop
                                        elsif bts.last.to_s == bes.last.to_s; ary.unshift bes.pop ; bts.pop
                                        else break
                                        end
                                end

                                # if () { a; } else { b; } => if () { a; } else b;
                                # if () { a; } else {} => if () { a; }
                                case bes.length
                                when 0; s.belse = nil
                                #when 1; s.belse = bes.first
                                end
                        end

                        # if () {} else { a; } => if (!) { a; }
                        # if () { a; } => if () a;
                        case bts.length
                        when 0; s.test, s.bthen, s.belse = C::CExpression.negate(s.test), s.belse, nil if s.belse
                        #when 1; s.bthen = bts.first        # later (allows simpler handling in _while)
                        end
                end

                # l1: l2: if () goto l1; goto l2;  =>  if(!) goto l2; goto l1;
                if s.kind_of?(C::If)
                        ls = s.bthen
                        ls = ls.statements.last if ls.kind_of?(C::Block)
                        if ls.kind_of?(C::Goto)
                                if li = inner_labels.index(ls.target)
                                        table = inner_labels
                                else
                                        table = ary.map { |st| st.name if st.kind_of?(C::Label) }.compact.reverse
                                        li = table.index(ls.target) || table.length
                                end
                                g = ary.find { |ss|
                                        break if ss.kind_of?(C::Return)
                                        next if not ss.kind_of?(C::Goto)
                                        table.index(ss.target).to_i > li
                                }
                                if g
                                        s.test = C::CExpression.negate s.test
                                        if not s.bthen.kind_of?(C::Block)
                                                ls = C::Block.new(scope)
                                                ls.statements << s.bthen
                                                s.bthen = ls
                                        end
                                        ary[0..ary.index(g)], s.bthen.statements = s.bthen.statements, decompile_cseq_if(ary[0..ary.index(g)], scope)
                                end
                        end
                end

                ret << s
        end
        ret
end
decompile_cseq_switch(scope) click to toggle source

TODO

# File metasm/decompile.rb, line 884
def decompile_cseq_switch(scope)
        uncast = lambda { |e| e = e.rexpr while e.kind_of?(C::CExpression) and not e.op ; e }
        walk(scope) { |s|
                # XXX pfff...
                next if not s.kind_of?(C::If)
                # if (v < 12) return ((void(*)())(tableaddr+4*v))();
                t = s.bthen
                t = t.statements.first if t.kind_of?(C::Block) and t.statements.length == 1
                next if not t.kind_of?(C::Return) or not t.respond_to?(:from_instr)
                next if t.from_instr.comment.to_a.include?('switch')
                next if not t.value.kind_of?(C::CExpression) or t.value.op != :funcall or t.value.rexpr != [] or not t.value.lexpr.kind_of?(C::CExpression) or t.value.lexpr.op
                p = uncast[t.value.lexpr.rexpr]
                next if not p.kind_of?(C::CExpression) or p.op != :* or p.lexpr
                p = uncast[p.rexpr]
                next if not p.kind_of?(C::CExpression) or p.op != :+
                r, l = uncast[p.rexpr], uncast[p.lexpr]
                r, l = l, r if r.kind_of?(C::CExpression)
                next if not r.kind_of?(::Integer) or not l.kind_of?(C::CExpression) or l.op != :* or not l.lexpr
                lr, ll = uncast[l.rexpr], uncast[l.lexpr]
                lr, ll = ll, lr if not ll.kind_of?(::Integer)
                next if ll != sizeof(nil, C::Pointer.new(C::BaseType.new(:void)))
                base, index = r, lr
                if s.test.kind_of?(C::CExpression) and (s.test.op == :<= or s.test.op == :<) and s.test.lexpr == index and
                                s.test.rexpr.kind_of?(C::CExpression) and not s.test.rexpr.op and s.test.rexpr.rexpr.kind_of?(::Integer)
                        t.from_instr.add_comment 'switch'
                        sup = s.test.rexpr.rexpr
                        rng = ((s.test.op == :<) ? (0...sup) : (0..sup))
                        from = t.from_instr.address
                        rng.map { |i| @dasm.backtrace(Indirection[base+ll*i, ll, from], from, :type => :x, :origin => from, :maxdepth => 0) }
                        @dasm.disassemble
                        throw :restart, :restart
                end
                puts "unhandled switch() at #{t.from_instr}" if $VERBOSE
        }
end
decompile_cseq_while(ary, scope) click to toggle source
# File metasm/decompile.rb, line 814
def decompile_cseq_while(ary, scope)
        return if forbid_decompile_ifwhile

        # find the next instruction that is not a label
        ni = lambda { |li| (li..ary.length).find { |ni_| not ary[ni_].kind_of?(C::Label) } }

        finished = false; while not finished; finished = true         # ruby1.9 does not support 'retry'
        i = 0
        while i < ary.length
                si = i
                s = ary[si]
                i += 1
                case s
                when C::Label
                        if ssi = ni[si] and ss = ary[ssi] and ss.kind_of?(C::If) and not ss.belse and ss.bthen.kind_of?(C::Block)
                                if ss.bthen.statements.last.kind_of?(C::Goto) and ss.bthen.statements.last.target == s.name
                                        # l: if (a) { b; goto l; }  =>  while(a) { b; }
                                        ss.bthen.statements.pop
                                        if l = ary[ssi+1] and l.kind_of?(C::Label)
                                                ss.bthen.statements.grep(C::If).each { |it|
                                                        it.bthen = C::Break.new if it.bthen.kind_of?(C::Goto) and it.bthen.target == l.name
                                                }
                                        end
                                        ary[ssi] = C::While.new(ss.test, ss.bthen)
                                elsif ss.bthen.statements.last.kind_of?(C::Return) and gi = ((si+1)..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
                                        # l: if (a) { b; return; } c; goto l;  =>  while (!a) { c; } b; return;
                                        wb = C::Block.new(scope)
                                        wb.statements = decompile_cseq_while(ary[ssi+1...gi], wb)
                                        w = C::While.new(C::CExpression.negate(ss.test), wb)
                                        ary[ssi..gi] = [w, *ss.bthen.statements]
                                        finished = false ; break  #retry
                                end
                        end
                        if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::Goto) and ary[_si].target == s.name }
                                # l: a; goto l;  =>  while(1) { a; }
                                wb = C::Block.new(scope)
                                wb.statements = decompile_cseq_while(ary[si...gi], wb)
                                w = C::While.new(C::CExpression[1], wb)
                                ary[si..gi] = [w]
                                finished = false ; break   #retry
                        end
                        if gi = (si..ary.length).to_a.reverse.find { |_si| ary[_si].kind_of?(C::If) and not ary[_si].belse and gt = ary[_si].bthen and
                                        (gt = gt.kind_of?(C::Block) ? gt.statements.last : gt) and gt.kind_of?(C::Goto) and gt.target == s.name }
                                # l: a; if (b) goto l;  =>  do { a; } while (b);
                                # l: a; if (b) { c; goto l; }  =>  do { a; if (!b) break; c; } while(1);
                                wb = C::Block.new(scope)
                                g = ary[gi]
                                if g.bthen.kind_of?(C::Block) and g.bthen.statements.length > 1
                                        nary = ary[si...gi] + [C::If.new(C::CExpression.negate(g.test), C::Break.new)] + g.bthen.statements[0...-1]
                                        wb.statements = decompile_cseq_while(nary, wb)
                                        w = C::DoWhile.new(C::CExpression[1], wb)
                                else
                                        wb.statements = decompile_cseq_while(ary[si...gi], wb)
                                        w = C::DoWhile.new(g.test, wb)
                                end
                                ary[si..gi] = [w]
                                finished = false ; break   #retry
                        end
                when C::If
                        decompile_cseq_while(s.bthen.statements, s.bthen) if s.bthen.kind_of?(C::Block)
                        decompile_cseq_while(s.belse.statements, s.belse) if s.belse.kind_of?(C::Block)
                when C::While, C::DoWhile
                        decompile_cseq_while(s.body.statements, s.body) if s.body.kind_of?(C::Block)
                end
        end
        end   # while finished
        ary
end
decompile_func(entry) click to toggle source

decompile a function, decompiling subfunctions as needed may return :restart, which means that the decompilation should restart from the entrypoint (and bubble up) (eg a new codepath is found which may changes dependency in blocks etc)

# File metasm/decompile.rb, line 43
def decompile_func(entry)
        return if @recurse < 0
        entry = @dasm.normalize entry
        return if not @dasm.decoded[entry]

        # create a new toplevel function to hold our code
        func = C::Variable.new
        func.name = @dasm.auto_label_at(entry, 'func')
        if f = @dasm.function[entry] and f.decompdata and f.decompdata[:return_type]
                rettype = f.decompdata[:return_type]
        else
                rettype = C::BaseType.new(:int)
        end
        func.type = C::Function.new rettype, []
        if @c_parser.toplevel.symbol[func.name]
                return if @recurse == 0
                if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
                        # recursive dependency: declare prototype
                        puts "function #{func.name} is recursive: predecompiling for prototype" if $VERBOSE
                        pre_recurse = @recurse
                        @recurse = 0
                        @c_parser.toplevel.symbol.delete func.name
                        decompile_func(entry)
                        @recurse = pre_recurse
                        if not @c_parser.toplevel.statements.grep(C::Declaration).find { |decl| decl.var.name == func.name }
                                @c_parser.toplevel.statements << C::Declaration.new(func)
                        end
                end
                return
        end
        @c_parser.toplevel.symbol[func.name] = func
        puts "decompiling #{func.name}" if $VERBOSE

        while catch(:restart) { do_decompile_func(entry, func) } == :restart
                retval = :restart
        end

        @c_parser.toplevel.symbol[func.name] = func   # recursive func prototype could have overwritten us
        @c_parser.toplevel.statements << C::Declaration.new(func)

        puts " decompiled #{func.name}" if $VERBOSE

        retval
end
decompile_func_rec(entry) click to toggle source

calls decompile_func with recurse -= 1 (internal use)

# File metasm/decompile.rb, line 89
def decompile_func_rec(entry)
        @recurse -= 1
        decompile_func(entry)
ensure
        @recurse += 1
end
disassembler() click to toggle source
# File metasm/decompile.rb, line 22
def disassembler ; dasm ; end
do_decompile_func(entry, func) click to toggle source
# File metasm/decompile.rb, line 96
def do_decompile_func(entry, func)
        # find decodedinstruction graph of the function, decompile subfuncs
        myblocks = listblocks_func(entry)

        # [esp+8] => [:frameptr-12]
        # TODO slow
        makestackvars entry, myblocks.map { |b, to| @dasm.decoded[b].block }

        # find registry dependencies between blocks
        deps = @dasm.cpu.decompile_func_finddeps(self, myblocks, func)

        scope = func.initializer = C::Block.new(@c_parser.toplevel)
        if df = @dasm.function[entry]
                scope.decompdata = df.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
        else
                scope.decompdata ||= {:unalias_type => {}, :unalias_name => {}}
        end

        # di blocks => raw c statements, declare variables
        @dasm.cpu.decompile_blocks(self, myblocks, deps, func)
        puts "dcmp debug #{func.name} {", scope, '}' if $DEBUG

        return if forbid_all_optimizations

        simplify_goto(scope)
        namestackvars(scope)
        unalias_vars(scope, func)
        decompile_c_types(scope)
        optimize_code(scope)
        optimize_vars(scope)
        optimize_vars(scope)  # 1st run may transform i = i+1 into i++ which second run may coalesce into if(i)
        remove_unreferenced_vars(scope)
        decompile_c_types_again(scope)
        cleanup_var_decl(scope, func)
        if @recurse > 0
                decompile_controlseq(scope)
                optimize_vars(scope)
                optimize_code(scope)
                optimize_ctrl(scope)
                optimize_vars(scope)
                remove_unreferenced_vars(scope)
                simplify_varname_noalias(scope)
                rename_variables(scope)
        end
        @dasm.cpu.decompile_check_abi(self, entry, func)

        case ret = scope.statements.last
        when C::CExpression; puts "no return at end of func" if $VERBOSE
        when C::Return
                if not ret.value
                        scope.statements.pop
                else
                        v = ret.value
                        v = v.rexpr if v.kind_of?(C::CExpression) and not v.op and v.rexpr.kind_of?(C::Typed)
                        func.type.type = v.type
                end
        end

        if @recurse == 0
                # we need only the prototype
                func.initializer = nil
        end
end
finalize() click to toggle source
# File metasm/decompile.rb, line 2715
def finalize
        optimize_global
        true
end
fix_pointer_arithmetic(scope) click to toggle source

fix pointer arithmetic (eg int foo += 4 => int* foo += 1) use struct member access (eg *(structptr+8) => structptr->bla) must be run only once, right after type setting

# File metasm/decompile.rb, line 1674
def fix_pointer_arithmetic(scope)
        walk_ce(scope, true) { |ce|
                if ce.lexpr and ce.lexpr.type.pointer? and [:&, :>>, :<<].include?(ce.op)
                        ce.lexpr = C::CExpression[[ce.lexpr], C::BaseType.new(:int)]
                end

                if ce.op == :+ and ce.lexpr and ((ce.lexpr.type.integral? and ce.rexpr.type.pointer?) or (ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Union)))
                        ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
                end

                if ce.op == :* and not ce.lexpr and ce.rexpr.type.pointer? and ce.rexpr.type.pointed.untypedef.kind_of?(C::Struct)
                        s = ce.rexpr.type.pointed.untypedef
                        m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
                        if sizeof(m) != sizeof(ce)
                                ce.rexpr = C::CExpression[[ce.rexpr, C::Pointer.new(s)], C::Pointer.new(ce.type)]
                                next
                        end
                        # *structptr => structptr->member
                        ce.lexpr = ce.rexpr
                        ce.op = :'->'
                        ce.rexpr = m.name
                        ce.type = m.type
                        next
                elsif ce.op == :'=' and ce.lexpr.type.untypedef.kind_of?(C::Struct)
                        s = ce.lexpr.type.untypedef
                        m = s.members.to_a.find { |m_| s.offsetof(@c_parser, m_.name) == 0 }
                        ce.lexpr = C::CExpression.new(ce.lexpr, :'.', m.name, m.type)
                        ce.type = m.type
                        next
                end

                if ce.op == :+ and ce.lexpr and ce.lexpr.type.pointer? and not ce.type.pointer?
                        ce.type = ce.lexpr.type
                end

                if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr
                        ce.replace C::CExpression[ce.rexpr.rexpr]
                end

                next if not ce.lexpr or not ce.lexpr.type.pointer?
                if ce.op == :+ and (s = ce.lexpr.type.pointed.untypedef).kind_of?(C::Union) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and
                                ce.rexpr.rexpr.kind_of?(::Integer) and o = ce.rexpr.rexpr
                        # structptr + 4 => &structptr->member
                        ce.replace structoffset(s, ce.lexpr, o, nil)
                elsif [:+, :-, :'+=', :'-='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ((not ce.rexpr.op and i = ce.rexpr.rexpr) or
                                (ce.rexpr.op == :* and i = ce.rexpr.lexpr and ((i.kind_of?(C::CExpression) and not i.op and i = i.rexpr) or true))) and
                                i.kind_of?(::Integer) and psz = sizeof(nil, ce.lexpr.type.pointed) and i % psz == 0
                        # ptr += 4 => ptr += 1
                        if not ce.rexpr.op
                                ce.rexpr.rexpr /= psz
                        else
                                ce.rexpr.lexpr.rexpr /= psz
                                if ce.rexpr.lexpr.rexpr == 1
                                        ce.rexpr = ce.rexpr.rexpr
                                end
                        end
                        ce.type = ce.lexpr.type

                elsif (ce.op == :+ or ce.op == :-) and sizeof(nil, ce.lexpr.type.pointed) != 1
                        # ptr+x => (ptrtype*)(((__int8*)ptr)+x)
                        # XXX create struct ?
                        ce.rexpr = C::CExpression[ce.rexpr, C::BaseType.new(:int)] if not ce.rexpr.type.integral?
                        if sizeof(nil, ce.lexpr.type.pointed) != 1
                                ptype = ce.lexpr.type
                                p = C::CExpression[[ce.lexpr], C::Pointer.new(C::BaseType.new(:__int8))]
                                ce.replace C::CExpression[[p, ce.op, ce.rexpr, p.type], ptype]
                        end
                end
        }
end
fix_type_overlap(scope) click to toggle source

handling of var overlapping (eg __int32 var_10; __int8 var_F => replace all var_F by *(&var_10 + 1)) must be done before fix_pointer_arithmetic

# File metasm/decompile.rb, line 1747
def fix_type_overlap(scope)
        varinfo = {}
        scope.symbol.each_value { |var|
                next if not off = var.stackoff
                len = sizeof(var)
                varinfo[var] = [off, len]
        }

        varinfo.each { |v1, (o1, l1)|
                next if not v1.type.integral?
                varinfo.each { |v2, (o2, l2)|
                        # XXX o1 may overlap o2 AND another (int32 v_10; int32 v_E; int32 v_C;)
                        # TODO should check stuff with aliasing domains
                        next if v1.name == v2.name or o1 >= o2+l2 or o1+l1 <= o2 or l1 > l2 or (l2 == l1 and o2 >= o1)
                        next if o1 == o2 and l1 != l2
                        # v1 => *(&v2+delta)
                        p = C::CExpression[:&, v2]
                        p = C::CExpression[p, :+,  [o1-o2]] if o1 != o2
                        p = C::CExpression[p, C::Pointer.new(v1.type)] if v1.type != p.type.type
                        p = C::CExpression[:*, p]
                        walk_ce(scope) { |ce|
                                ce.lexpr = p if ce.lexpr == v1
                                ce.rexpr = p if ce.rexpr == v1
                        }
                }
        }
end
isvar(ce, var) click to toggle source

checks if expr is a var (var or *&var)

# File metasm/decompile.rb, line 945
def isvar(ce, var)
        if var.stackoff and ce.kind_of?(C::CExpression)
                return unless ce.op == :* and not ce.lexpr
                ce = ce.rexpr
                ce = ce.rexpr while ce.kind_of?(C::CExpression) and not ce.op
                return unless ce.kind_of?(C::CExpression) and ce.op == :& and not ce.lexpr
                ce = ce.rexpr
        end
        ce == var
end
listblocks_func(entry) click to toggle source

return an array of [address of block start, list of block to]] decompile subfunctions

# File metasm/decompile.rb, line 279
def listblocks_func(entry)
        @autofuncs ||= []
        blocks = []
        entry = dasm.normalize entry
        todo = [entry]
        while a = todo.pop
                next if blocks.find { |aa, at| aa == a }
                next if not di = @dasm.di_at(a)
                blocks << [a, []]
                di.block.each_to { |ta, type|
                        next if type == :indirect
                        ta = dasm.normalize ta
                        if type != :subfuncret and not @dasm.function[ta] and
                                        (not @dasm.function[entry] or @autofuncs.include?(entry)) and
                                        di.block.list.last.opcode.props[:saveip]
                                # possible noreturn function
                                # XXX call $+5; pop eax
                                @autofuncs << ta
                                @dasm.function[ta] = DecodedFunction.new
                                puts "autofunc #{Expression[ta]}" if $VERBOSE
                        end

                        if @dasm.function[ta] and type != :subfuncret
                                f = dasm.auto_label_at(ta, 'func')
                                ta = dasm.normalize($1) if f =~ /^thunk_(.*)/
                                ret = decompile_func_rec(ta) if (ta != entry or di.block.to_subfuncret)
                                throw :restart, :restart if ret == :restart
                        else
                                @dasm.auto_label_at(ta, 'label') if blocks.find { |aa, at| aa == ta }
                                blocks.last[1] |= [ta]
                                todo << ta
                        end
                }
        end
        blocks
end
makestackvars(funcstart, blocks) click to toggle source

patches instruction's backtrace_binding to replace things referring to a static stack offset from func start by :frameptr+off

# File metasm/decompile.rb, line 332
def makestackvars(funcstart, blocks)
        blockstart = nil
        cache_di = nil
        cache = {}    # [i_s, e, type] => backtrace
        @decomp_mkstackvars_terminals ||= [:frameptr]
        tovar = lambda { |di, e, i_s_c|
                i_s = (i_s_c > 0)
                case e
                when Expression; Expression[tovar[di, e.lexpr, i_s_c], e.op, tovar[di, e.rexpr, i_s_c]].reduce
                when Indirection; Indirection[tovar[di, e.target, i_s_c-1], e.len, e.origin]
                when :frameptr; e
                when ::Symbol
                        cache.clear if cache_di != di ; cache_di = di
                        vals = cache[[e, i_s, 0]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => blockstart,
                                        :include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals)
                        # backtrace only to blockstart first
                        if vals.length == 1 and ee = vals.first and ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
                                        (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer)) or
                                        (not ee.lexpr and ee.op == :+ and ee.rexpr.kind_of?(Indirection) and eep = ee.rexpr.pointer and
                                        (eep == Expression[:frameptr] or (eep.lexpr == :frameptr and eep.op == :+ and eep.rexpr.kind_of?(::Integer)))))
                                ee
                        else
                        # fallback on full run (could restart from blockstart with ee, but may reevaluate addr_binding..
                        vals = cache[[e, i_s, 1]] ||= @dasm.backtrace(e, di.address, :snapshot_addr => funcstart,
                                        :include_start => i_s, :no_check => true, :terminals => @decomp_mkstackvars_terminals)
                        if vals.length == 1 and ee = vals.first and (ee.kind_of?(Expression) and (ee == Expression[:frameptr] or
                                        (ee.lexpr == :frameptr and ee.op == :+ and ee.rexpr.kind_of?(::Integer))))
                                ee
                        else e
                        end
                        end
                else e
                end
        }

        # must not change bt_bindings until everything is backtracked
        repl_bind = {}        # di => bt_bd

        @dasm.cpu.decompile_makestackvars(@dasm, funcstart, blocks) { |block|
                block.list.each { |di|
                        bd = di.backtrace_binding ||= @dasm.cpu.get_backtrace_binding(di)
                        newbd = repl_bind[di] = {}
                        bd.each { |k, v|
                                k = tovar[di, k, 2] if k.kind_of?(Indirection)
                                next if k == Expression[:frameptr] or (k.kind_of?(Expression) and k.lexpr == :frameptr and k.op == :+ and k.rexpr.kind_of?( ::Integer))
                                newbd[k] = tovar[di, v, 0]
                        }
                }
        }

        repl_bind.each { |di, bd| di.backtrace_binding = bd }
end
namestackvars(scope) click to toggle source

patch scope to transform :frameoff-x into &var_x

# File metasm/decompile.rb, line 1210
def namestackvars(scope)
        off2var = {}
        newvar = lambda { |o, n|
                if not v = off2var[o]
                        v = off2var[o] = C::Variable.new
                        v.type = C::BaseType.new(:void)
                        v.name = n
                        v.stackoff = o
                        scope.symbol[v.name] = v
                        scope.statements << C::Declaration.new(v)
                end
                v
        }

        walk_ce(scope) { |e|
                next if e.op != :+ and e.op != :-
                next if not e.lexpr.kind_of?(C::Variable) or e.lexpr.name != 'frameptr'
                next if not e.rexpr.kind_of?(C::CExpression) or e.rexpr.op or not e.rexpr.rexpr.kind_of?(::Integer)
                off = e.rexpr.rexpr
                off = -off if e.op == :-
                v = newvar[off, stackoff_to_varname(off)]
                e.replace C::CExpression[:&, v]
        }
end
new_global_var(addr, type, scope=nil) click to toggle source
# File metasm/decompile.rb, line 183
def new_global_var(addr, type, scope=nil)
        addr = @dasm.normalize(addr)

        # (almost) NULL ptr
        return if addr.kind_of?(Integer) and addr >= 0 and addr < 32

        # check preceding structure we're hitting
        # TODO check what we step over when defining a new static struct
        0x100.times { |i_|
                next if not n = @dasm.get_label_at(addr-i_)
                next if not v = @c_parser.toplevel.symbol[n]
                next if not v.type.pointer? or not v.type.pointed.untypedef.kind_of?(C::Union)
                break if i_ == 0     # XXX it crashes later if we dont break here
                next if sizeof(v.type.pointed) <= i_
                return structoffset(v.type.pointed.untypedef, C::CExpression[v], i_, nil)
        }

        ptype = type.pointed.untypedef if type.pointer?
        if ptype.kind_of?(C::Function)
                name = @dasm.auto_label_at(addr, 'sub', 'xref', 'byte', 'word', 'dword', 'unk')
                if @dasm.get_section_at(addr) and @recurse > 0
                        puts "found function pointer to #{name}" if $VERBOSE
                        @dasm.disassemble(addr) if not @dasm.decoded[addr]  # TODO disassemble_fast ?
                        f = @dasm.function[addr] ||= DecodedFunction.new
                        # TODO detect thunks (__noreturn)
                        f.decompdata ||= { :unalias_type => {}, :unalias_name => {} }
                        if not s = @c_parser.toplevel.symbol[name] or not s.initializer or not s.type.untypedef.kind_of?(C::Function)
                                os = @c_parser.toplevel.symbol.delete name
                                @c_parser.toplevel.statements.delete_if { |ts| ts.kind_of?(C::Declaration) and ts.var.name == name }
                                #aoff = 1
                                #ptype.args.to_a.each { |a|
                                        # TODO
                                        #aoff = (aoff + @c_parser.typesize[:ptr] - 1) / @c_parser.typesize[:ptr] * @c_parser.typesize[:ptr]
                                        #f.decompdata[:unalias_type][aoff] ||= a.type
                                        #f.decompdata[:unalias_name][aoff] ||= a.name if a.name
                                        #aoff += sizeof(a)        # ary ?
                                #}
                                decompile_func_rec(addr)
                                s = @c_parser.toplevel.symbol[name]
                                walk_ce([@c_parser.toplevel, scope]) { |ce|
                                        ce.lexpr = s if ce.lexpr == os
                                        ce.rexpr = s if ce.rexpr == os
                                } if os and s      # update existing references to old instance
                        # else redecompile with new prototye ?
                        end
                end
        end

        name = case (type.pointer? && tsz = sizeof(nil, ptype))
        when 1; 'byte'
        when 2; 'word'
        when 4; 'dword'
        else 'unk'
        end
        name = 'stru' if ptype.kind_of?(C::Union)
        name = @dasm.auto_label_at(addr, name, 'xref', 'byte', 'word', 'dword', 'unk', 'stru')

        if not var = @c_parser.toplevel.symbol[name]
                var = C::Variable.new
                var.name = name
                var.type = type.pointer? ? C::Array.new(ptype) : type
                @c_parser.toplevel.symbol[var.name] = var
                @c_parser.toplevel.statements << C::Declaration.new(var)
        end
        if ptype.kind_of?(C::Union) and type.pointer? and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length
                # TODO struct init, array, fptrs..
        elsif type.pointer? and not type.pointed.untypedef.kind_of?(C::Function) and s = @dasm.get_section_at(name) and s[0].ptr < s[0].length and
                        [1, 2, 4].include?(tsz) and (not var.type.pointer? or sizeof(var.type.pointed) != sizeof(type.pointed) or not var.initializer)
                # TODO do not overlap other statics (but labels may refer to elements of the array...)
                data = (0..256).map {
                        v = s[0].decode_imm("u#{tsz*8}".to_sym, @dasm.cpu.endianness)
                        v = decompile_cexpr(v, @c_parser.toplevel) if v.kind_of?(Expression)        # relocation
                        v
                }
                var.initializer = data.map { |v| C::CExpression[v, C::BaseType.new(:int)] } unless (data - [0]).empty?
                if (tsz == 1 or tsz == 2) and eos = data.index(0) and ((0..3).all? { |i| data[i] >= 0x20 and data[i] < 0x7f } or ptype.to_s == '(char)')     # printable str
                        # XXX 0x80 with ruby1.9...
                        var.initializer = C::CExpression[data[0, eos].pack('C*'), C::Pointer.new(ptype)] rescue nil
                end
                if var.initializer.kind_of?(::Array) and i = var.initializer.first and i.kind_of?(C::CExpression) and not i.op and i.rexpr.kind_of?(C::Variable) and
                                i.rexpr.type.kind_of?(C::Function) and not @dasm.get_section_at(@dasm.normalize(i.rexpr.name))     # iat_ExternalFunc
                        i.type = i.rexpr.type
                        type = var.type = C::Array.new(C::Pointer.new(i.type))
                        var.initializer = [i]
                end
                var.initializer = nil if var.initializer.kind_of?(::Array) and not type.untypedef.kind_of?(C::Array)
        end

        # TODO patch existing references to addr ? (or would they have already triggered new_global_var?)

        # return the object to use to replace the raw addr
        var
end
optimize_code(scope) click to toggle source

simplify cexpressions (char & 255, redundant casts, etc)

# File metasm/decompile.rb, line 1776
def optimize_code(scope)
        return if forbid_optimize_code

        sametype = lambda { |t1, t2|
                t1 = t1.untypedef
                t2 = t2.untypedef
                t1 = t1.pointed.untypedef if t1.pointer? and t1.pointed.untypedef.kind_of?(C::Function)
                t2 = t2.pointed.untypedef if t2.pointer? and t2.pointed.untypedef.kind_of?(C::Function)
                t1 == t2 or
                (t1.kind_of?(C::Function) and t2.kind_of?(C::Function) and sametype[t1.type, t2.type] and t1.args.to_a.length == t2.args.to_a.length and
                         t1.args.to_a.zip(t2.args.to_a).all? { |st1, st2| sametype[st1.type, st2.type] }) or
                (t1.kind_of?(C::BaseType) and t1.integral? and t2.kind_of?(C::BaseType) and t2.integral? and sizeof(nil, t1) == sizeof(nil, t2)) or
                (t1.pointer? and t2.pointer? and sametype[t1.type, t2.type])
        }

        # most of this is a CExpr#reduce
        future_array = []
        walk_ce(scope, true) { |ce|
                # (whatever)0 => 0
                if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.replace ce.rexpr
                end

                # *&bla => bla if types ok
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :& and not ce.rexpr.lexpr and sametype[ce.rexpr.type.pointed, ce.rexpr.rexpr.type]
                        ce.replace C::CExpression[ce.rexpr.rexpr]
                end

                # int x + 0xffffffff => x-1
                if ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and [:+, :-, :'+=', :'-=', :'!=', :==, :>, :<, :>=, :<=].include?(ce.op) and
                                ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr)))-1
                        ce.op = {:+ => :-, :- => :+, :'+=' => :'-=', :'-=' => :'+='}[ce.op]
                        ce.rexpr.rexpr = 1
                end

                # i + ptr => ptr + i
                if ce.op == :+ and ce.lexpr and ce.rexpr.type.pointer? and ce.lexpr.type.integral?
                        ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
                end

                # i + v => v + i
                if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer)
                        # avoid infinite swapping
                        if not ce.rexpr.kind_of?(C::CExpression) or ce.rexpr.op or not ce.rexpr.rexpr.kind_of?(::Integer)
                                ce.rexpr, ce.lexpr = ce.lexpr, ce.rexpr
                        end
                end

                # (a + b) + c => a + (b + c)
                if ce.op == :+ and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :+ and ce.lexpr.lexpr
                        ce.lexpr, ce.rexpr = ce.lexpr.lexpr, C::CExpression[ce.lexpr.rexpr, :+, ce.rexpr]
                        optimize_code(ce)
                end

                # 1 + 2 => 3
                if (ce.op == :+ or ce.op == :- or ce.op == :*) and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
                        ce.lexpr, ce.op, ce.rexpr = nil, nil, ce.lexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
                end

                # 4 * (a + 1) => 4*a + 4
                if ce.op == :* and ce.lexpr.kind_of?(C::CExpression) and ce.type.integral? and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and ce.rexpr.kind_of?(C::CExpression) and (ce.rexpr.op == :+ or ce.rexpr.op == :-) and ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer)
                        ce.replace C::CExpression[[ce.lexpr, ce.op, ce.rexpr.lexpr], ce.rexpr.op, [ce.lexpr.rexpr * ce.rexpr.rexpr.rexpr]]
                        optimize_code(ce)
                end

                # int *ptr; *(ptr + 4) => ptr[4]
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer?
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
                        future_array << var.name
                end

                # ptr + (i << 3) => ptr + 8*i
                if (ce.op == :+ or ce.op == :[]) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :<< and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and ce.rexpr.rexpr.rexpr.kind_of?(::Integer)
                        ce.rexpr.rexpr.rexpr = 1 << ce.rexpr.rexpr.rexpr
                        ce.rexpr.lexpr, ce.rexpr.op, ce.rexpr.rexpr = ce.rexpr.rexpr, :*, ce.rexpr.lexpr
                end

                # char x; x & 255 => x
                if ce.op == :& and ce.lexpr and (ce.lexpr.type.integral? or ce.lexpr.type.pointer?) and ce.rexpr.kind_of?(C::CExpression) and
                                not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and m = (1 << (8*sizeof(ce.lexpr))) - 1 and
                                ce.rexpr.rexpr & m == m
                        ce.replace C::CExpression[ce.lexpr]
                end

                # a + -b  =>  a - b
                if ce.op == :+ and ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :- and not ce.rexpr.lexpr
                        ce.op, ce.rexpr = :-, ce.rexpr.rexpr
                end

                # (((int) i >> 31) & 1)  =>  i < 0
                if ce.op == :& and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1 and
                                ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :>> and ce.lexpr.rexpr.kind_of?(C::CExpression) and
                                not ce.lexpr.rexpr.op and ce.lexpr.rexpr.rexpr == sizeof(ce.lexpr.lexpr) * 8 - 1
                        ce.replace C::CExpression[ce.lexpr.lexpr, :<, [0]]
                end

                # a-b == 0  =>  a == b
                if ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and [:==, :'!=', :<, :>, :<=, :>=].include?(ce.op) and
                                ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :- and ce.lexpr.lexpr
                        ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
                end

                # (a > 0) != 0
                if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and
                                [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op)
                        ce.replace ce.lexpr
                end

                # (a < b) != ( [(a < 0) == !(b < 0)] && [(a < 0) != (a < b)] )  =>  jl
                # a<b  =>  true if !r  =>  a<0 == b<0  or a>=0  =>  a>=0 or b>=0
                # a>=b =>  true if  r  =>  a<0 == b>=0 and a<0  =>  a<0 and b>=0

                # x != (a && (b != x))  =>  [x && (!a || b)] || [!x && !(!a || b)]
                if ce.op == :'!=' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :< and ce.rexpr.kind_of?(C::CExpression) and
                                ce.rexpr.op == :'&&' and ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.op == :'!=' and
                                ce.rexpr.rexpr.rexpr == ce.lexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
                        x, a, b = ce.lexpr, ce.rexpr.lexpr, ce.rexpr.rexpr.lexpr
                        ce.replace C::CExpression[ [x, :'&&', [[:'!',a],:'||',b]] , :'||',  [[:'!', x], :'&&', [:'!', [[:'!',a],:'||',b]]] ]
                        optimize_code(ce)
                end
                # (a != b) || a  =>  a || b
                if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'!=' and ce.lexpr.lexpr == ce.rexpr and not walk_ce(ce) { |ce_| break true if ce_.op == :funcall }
                        ce.lexpr, ce.rexpr = ce.rexpr, ce.lexpr.rexpr
                        optimize_code(ce)
                end
                # (a<b) && !(a>=0 && b<0)  ||  (a>=b) && (a>=0 && b<0)  =>  (signed)a < (signed)b
                if ce.op == :'||' and ce.lexpr.kind_of?(C::CExpression) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.op == :'&&' and ce.rexpr.op == :'&&' and
                                ce.lexpr.lexpr.kind_of?(C::CExpression) and ce.lexpr.lexpr.op == :<
                        a, b = ce.lexpr.lexpr.lexpr, ce.lexpr.lexpr.rexpr
                        if ce.lexpr.rexpr === C::CExpression[[a, :'>=', [0]], :'&&', [b, :'<', [0]]].negate and
                                        ce.rexpr.lexpr === ce.lexpr.lexpr.negate and ce.rexpr.rexpr === ce.lexpr.rexpr.negate
                                ce.replace C::CExpression[a, :'<', b]
                        end
                end
                # a && 1
                if (ce.op == :'||' or ce.op == :'&&') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
                        if ((ce.op == :'||' and ce.rexpr.rexpr == 0) or (ce.op == :'&&' and ce.rexpr.rexpr != 0))
                                ce.replace C::CExpression[ce.lexpr]
                        elsif not walk_ce(ce) { |ce_| break true if ce.op == :funcall }      # cannot wipe if sideeffect
                                ce.replace C::CExpression[[ce.op == :'||' ? 1 : 0]]
                        end
                end
                # (b < c || b >= c)
                if (ce.op == :'||' or ce.op == :'&&') and C::CExpression.negate(ce.lexpr) == C::CExpression[ce.rexpr]
                        ce.replace C::CExpression[[(ce.op == :'||') ? 1 : 0]]
                end

                # (a < b) | (a == b)  =>  a <= b
                if ce.op == :| and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :== and ce.lexpr.kind_of?(C::CExpression) and
                                (ce.lexpr.op == :< or ce.lexpr.op == :>) and ce.lexpr.lexpr == ce.rexpr.lexpr and ce.lexpr.rexpr == ce.rexpr.rexpr
                        ce.op = {:< => :<=, :> => :>=}[ce.lexpr.op]
                        ce.lexpr, ce.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr
                end

                # a == 0  =>  !a
                if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.lexpr, ce.op, ce.rexpr = nil, :'!', ce.lexpr
                end

                if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
                        ce.replace C::CExpression[[ce.rexpr.rexpr == 0 ? 1 : 0]]
                end

                # !(bool) => bool
                if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and [:'==', :'!=', :<, :>, :<=, :>=, :'||', :'&&', :'!'].include?(ce.rexpr.op)
                        ce.replace ce.rexpr.negate
                end

                # (foo)(bar)x => (foo)x
                if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression)
                        ce.rexpr = ce.rexpr.rexpr
                end

                # &struct.1stmember => &struct
                if ce.op == :& and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :'.' and s = ce.rexpr.lexpr.type and
                                s.kind_of?(C::Union) and s.offsetof(@c_parser, ce.rexpr.rexpr) == 0
                        ce.rexpr = ce.rexpr.lexpr
                        ce.type = C::Pointer.new(ce.rexpr.type)
                end

                # (1stmember*)structptr => &structptr->1stmember
                if not ce.op and ce.type.pointer? and not ce.type.pointed.void? and ce.rexpr.kind_of?(C::Typed) and ce.rexpr.type.pointer? and
                                s = ce.rexpr.type.pointed.untypedef and s.kind_of?(C::Union) and ce.type.pointed.untypedef != s
                        ce.rexpr = C::CExpression[structoffset(s, ce.rexpr, 0, sizeof(ce.type.pointed))]
                        #ce.replace ce.rexpr if not ce.type.pointed.untypedef.kind_of?(C::Function) or (ce.rexpr.type.pointer? and
                        #ce.rexpr.type.pointed.untypedef.kind_of?(C::Function))     # XXX ugly
                        # int32* v1 = (int32*)pstruct;
                        # z = v1+4  if v1 is not cast, the + is invalid (sizeof pointed changes)
                        # TODO when finding type of pstruct, set type of v1 accordingly
                end

                # (&foo)->bar => foo.bar
                if ce.op == :'->' and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == :& and not ce.lexpr.lexpr
                        ce.lexpr = ce.lexpr.rexpr
                        ce.op = :'.'
                end

                # (foo)bla => bla if bla of type foo
                if not ce.op and ce.rexpr.kind_of?(C::Typed) and sametype[ce.type, ce.rexpr.type]
                        ce.replace C::CExpression[ce.rexpr]
                end
                if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(C::Variable) and ce.lexpr.type == ce.lexpr.rexpr.type
                        ce.lexpr = ce.lexpr.rexpr
                end
        }

        # if there is a ptr[4], change all *ptr to ptr[0] for consistency
        # do this after the first pass, which may change &*ptr to ptr
        walk_ce(scope) { |ce|
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::Variable) and future_array.include?(ce.rexpr.name)
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr, :'[]', C::CExpression[0]
                end
        } if not future_array.empty?

        # if (x != 0) => if (x)
        walk(scope) { |st|
                if st.kind_of?(C::If) and st.test.kind_of?(C::CExpression) and st.test.op == :'!=' and
                                st.test.rexpr.kind_of?(C::CExpression) and not st.test.rexpr.op and st.test.rexpr.rexpr == 0
                        st.test = C::CExpression[st.test.lexpr]
                end
        }
end
optimize_ctrl(scope) click to toggle source

optimize if() { a; } to if() a;

# File metasm/decompile.rb, line 543
def optimize_ctrl(scope)
        simplify_goto(scope, true)

        # break/continue
        # XXX if (foo) while (bar) goto bla; bla:  should => break
        walk = lambda { |e, brk, cnt|
                case e
                when C::Block
                        walk[e.statements, brk, cnt]
                        e
                when ::Array
                        e.each_with_index { |st, i|
                                case st
                                when C::While, C::DoWhile
                                        l1 = (e[i+1].name if e[i+1].kind_of?(C::Label))
                                        l2 = (e[i-1].name if e[i-1].kind_of?(C::Label))
                                        e[i].body = walk[st.body, l1, l2]
                                else
                                        e[i] = walk[st, brk, cnt]
                                end
                        }
                        e
                when C::If
                        e.bthen = walk[e.bthen, brk, cnt] if e.bthen
                        e.belse = walk[e.belse, brk, cnt] if e.belse
                        e
                when C::While, C::DoWhile
                        e.body = walk[e.body, nil, nil]
                        e
                when C::Goto
                        if e.target == brk
                                C::Break.new
                        elsif e.target == cnt
                                C::Continue.new
                        else e
                        end
                else e
                end
        }
        walk[scope, nil, nil]

        remove_labels(scope)

        # while (1) { a; if(b) { c; return; }; d; }  =>  while (1) { a; if (b) break; d; } c;
        while st = scope.statements.last and st.kind_of?(C::While) and st.test.kind_of?(C::CExpression) and
                        not st.test.op and st.test.rexpr == 1 and st.body.kind_of?(C::Block)
                break if not i = st.body.statements.find { |ist|
                        ist.kind_of?(C::If) and not ist.belse and ist.bthen.kind_of?(C::Block) and ist.bthen.statements.last.kind_of?(C::Return)
                }
                walk(i.bthen.statements) { |sst| sst.outer = i.bthen.outer if sst.kind_of?(C::Block) and sst.outer == i.bthen }
                scope.statements.concat i.bthen.statements
                i.bthen = C::Break.new
        end

        patch_test = lambda { |ce|
                ce = ce.rexpr if ce.kind_of?(C::CExpression) and ce.op == :'!'
                # if (a+1)  =>  if (a != -1)
                if ce.kind_of?(C::CExpression) and (ce.op == :+ or ce.op == :-) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and ce.lexpr
                        ce.rexpr.rexpr = -ce.rexpr.rexpr if ce.op == :+
                        ce.op = :'!='
                end
        }

        walk(scope) { |ce|
                case ce
                when C::If
                        patch_test[ce.test]
                        if ce.bthen.kind_of?(C::Block)
                                case ce.bthen.statements.length
                                when 1
                                        walk(ce.bthen.statements) { |sst| sst.outer = ce.bthen.outer if sst.kind_of?(C::Block) and sst.outer == ce.bthen }
                                        ce.bthen = ce.bthen.statements.first
                                when 0
                                        if not ce.belse and i = ce.bthen.outer.statements.index(ce)
                                                ce.bthen.outer.statements[i] = ce.test   # TODO remove sideeffectless parts
                                        end
                                end
                        end
                        if ce.belse.kind_of?(C::Block) and ce.belse.statements.length == 1
                                walk(ce.belse.statements) { |sst| sst.outer = ce.belse.outer if sst.kind_of?(C::Block) and sst.outer == ce.belse }
                                ce.belse = ce.belse.statements.first
                        end
                when C::While, C::DoWhile
                        patch_test[ce.test]
                        if ce.body.kind_of?(C::Block)
                               case ce.body.statements.length
                               when 1
                                       walk(ce.body.statements) { |sst| sst.outer = ce.body.outer if sst.kind_of?(C::Block) and sst.outer == ce.body }
                                       ce.body = ce.body.statements.first
                               when 0
                                       if ce.kind_of?(C::DoWhile) and i = ce.body.outer.statements.index(ce)
                                              ce = ce.body.outer.statements[i] = C::While.new(ce.test, ce.body)
                                       end
                                       ce.body = nil
                               end
                        end
                end
        }

        walk(scope, false, true) { |ce|
                # while (1) { a; if (b) break; }  =>  do { a } while (!b);
                if ce.kind_of?(C::While) and ce.test.kind_of?(C::CExpression) and not ce.test.op and ce.test.rexpr == 1 and ce.body.kind_of?(C::Block)
                        i = ce.body.statements.last
                        if i.kind_of?(C::If) and not i.belse and i.bthen.kind_of?(C::Break)
                                ce.body.statements.pop
                                next C::DoWhile.new(i.test.negate, ce.body)
                        end
                end

                # if (a) b = 1; else b = 2;  =>  b = a ? 1 : 2
                if ce.kind_of?(C::If) and ce.belse.kind_of?(C::CExpression) and ce.belse.op == :'=' and ce.belse.lexpr.kind_of?(C::Variable) and ce.bthen.kind_of?(C::CExpression) and ce.bthen.op == :'=' and ce.bthen.lexpr == ce.belse.lexpr
                        next C::CExpression[ce.bthen.lexpr, :'=', [ce.test, :'?:', [ce.bthen.rexpr, ce.belse.rexpr]]]
                end
        }

        # TODO for (;;) {}

        walk(scope) { |ce|
                next if not ce.kind_of?(C::Block)
                st = ce.statements
                st.length.times { |n|
                        while st[n].kind_of?(C::If) and st[n+1].kind_of?(C::If) and not st[n].belse and not st[n+1].belse and (
                                        (st[n].bthen.kind_of?(C::Return) and st[n+1].bthen.kind_of?(C::Return) and st[n].bthen.value == st[n+1].bthen.value) or
                                        (st[n].bthen.kind_of?(C::Break) and st[n+1].bthen.kind_of?(C::Break)) or
                                        (st[n].bthen.kind_of?(C::Continue) and st[n+1].bthen.kind_of?(C::Continue)))
                                # if (a) return x; if (b) return x; => if (a || b) return x;
                                st[n].test = C::CExpression[st[n].test, :'||', st[n+1].test]
                                st.delete_at(n+1)
                        end
                }
        }
end
optimize_global() click to toggle source
# File metasm/decompile.rb, line 2720
def optimize_global
        # check all global vars (pointers to global data)
        tl = @c_parser.toplevel
        vars = tl.symbol.keys.find_all { |k| tl.symbol[k].kind_of?(C::Variable) and not tl.symbol[k].type.kind_of?(C::Function) and not tl.symbol[k].storage == :extern and not tl.symbol[k].storage == :static }
        countref = Hash.new(0)

        walk_ce(tl) { |ce|
                # XXX int foo; void bar() { int foo; }  =>  false negative
                countref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
                countref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
        }

        vars.delete_if { |v| countref[v] == 0 }
        countref.delete_if { |k, v| not vars.include?(k) }

        # by default globals are C::Arrays
        # if all references are *foo, dereference the var type
        # TODO allow foo to appear (change to &foo) (but still disallow casts/foo+12 etc)
        countderef = Hash.new(0)
        walk_ce(tl) { |ce|
                if ce.op == :* and not ce.lexpr
                        r = ce.rexpr
                elsif ce.op == :'->'
                        r = C::CExpression[ce.lexpr]
                else next
                end
                # compare type.type cause var is an Array and the cast is a Pointer
                countderef[r.rexpr.name] += 1 if r.kind_of?(C::CExpression) and not r.op and r.rexpr.kind_of?(C::Variable) and
                                sizeof(nil, r.type.type) == sizeof(nil, r.rexpr.type.type) rescue nil
        }
        vars.each { |n|
                if countref[n] == countderef[n]
                        v = tl.symbol[n]
                        target = C::CExpression[:*, [v]]
                        v.type = v.type.type
                        v.initializer = v.initializer.first if v.initializer.kind_of?(::Array)
                        walk_ce(tl) { |ce|
                                if ce.op == :'->' and C::CExpression[ce.lexpr] == C::CExpression[v]
                                        ce.op = :'.'
                                elsif ce.lexpr == target
                                        ce.lexpr = v
                                end
                                ce.rexpr = v if ce.rexpr == target
                                ce.lexpr, ce.op, ce.rexpr = nil, nil, v if ce == target
                        }
                end
        }

        # if a global var appears only in one function, make it a static variable
        tl.statements.each { |st|
                next if not st.kind_of?(C::Declaration) or not st.var.type.kind_of?(C::Function) or not scope = st.var.initializer
                localcountref = Hash.new(0)
                walk_ce(scope) { |ce|
                        localcountref[ce.rexpr.name] += 1 if ce.rexpr.kind_of?(C::Variable)
                        localcountref[ce.lexpr.name] += 1 if ce.lexpr.kind_of?(C::Variable)
                }

                vars.delete_if { |n|
                        next if scope.symbol[n]
                        next if localcountref[n] != countref[n]
                        v = tl.symbol.delete(n)
                        tl.statements.delete_if { |d| d.kind_of?(C::Declaration) and d.var.name == n }

                        if countref[n] == 1 and v.initializer.kind_of?(C::CExpression) and v.initializer.rexpr.kind_of?(String)
                                walk_ce(scope) { |ce|
                                        if ce.rexpr.kind_of?(C::Variable) and ce.rexpr.name == n
                                                if not ce.op
                                                        ce.replace v.initializer
                                                else
                                                        ce.rexpr = v.initializer
                                                end
                                        elsif ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == n
                                                ce.lexpr = v.initializer
                                        end
                                }
                        else
                                v.storage = :static
                                scope.symbol[v.name] = v
                                scope.statements.unshift C::Declaration.new(v)
                        end

                        true
                }
        }
end
optimize_vars(scope) click to toggle source

dataflow optimization condenses expressions (++x; if (x) => if (++x)) remove local var assignment (x = 1; f(x); x = 2; g(x); => f(1); g(2); etc) XXX omg

# File metasm/decompile.rb, line 2161
def optimize_vars(scope)
        return if forbid_optimize_dataflow

        g = c_to_graph(scope)

        # walks a cexpr in evaluation order (not strictly, but this is not strictly defined anyway..)
        # returns the first subexpr to read var in ce
        # returns :write if var is rewritten
        # returns nil if var not read
        # may return a cexpr var += 2
        find_next_read_ce = lambda { |ce_, var|
                walk_ce(ce_, true) { |ce|
                        case ce.op
                        when :funcall
                                break ce if ce.lexpr == var or ce.rexpr.find { |a| a == var }
                        when :'='
                                # a=a  /  a=a+1  => yield a, not :write
                                break ce if ce.rexpr == var
                                break :write if ce.lexpr == var
                        else
                                break ce if ce.lexpr == var or ce.rexpr == var
                        end
                }
        }

        # badlabels is a list of labels that may be reached without passing through the first invocation block
        find_next_read_rec = lambda { |label, idx, var, done, badlabels|
                next if done.include?(label)
                done << label if idx == 0
                list = g.exprs_var[var.name][label].to_a.find_all { |i| i >= idx }
                idx = list.shift

                idx = list.shift while idx and not ret = find_next_read_ce[g.exprs[label][idx], var]
                next ret if ret

                to = g.to_optim[label].to_a.map { |t|
                        break [:split] if badlabels.include?(t)
                        find_next_read_rec[t, 0, var, done, badlabels]
                }.compact

                tw = to - [:write]
                if to.include?(:split) or tw.length > 1
                        :split
                elsif tw.length == 1
                        tw.first
                elsif to.include?(:write)
                        :write
                end
        }
        # return the previous subexpr reading var with no fwd path to another reading (otherwise split), see loop comment for reason
        find_next_read = nil
        find_prev_read_rec = lambda { |label, idx, var, done|
                next if done.include?(label)
                done << label if idx == g.exprs[label].length-1
                list = g.exprs_var[var.name][label].to_a.find_all { |i| i <= idx }
                idx = list.pop

                idx = list.pop while idx and not ret = find_next_read_ce[g.exprs[label][idx], var]
                if ret.kind_of?(C::CExpression)
                        fwchk = find_next_read[label, idx+1, var]
                        ret = fwchk if not fwchk.kind_of?(C::CExpression)
                end
                next ret if ret

                from = g.from_optim[label].to_a.map { |f|
                        find_prev_read_rec[f, g.exprs[f].to_a.length-1, var, done]
                }.compact

                next :split if from.include?(:split)
                fw = from - [:write]
                if fw.length == 1
                        fw.first
                elsif fw.length > 1
                        :split
                elsif from.include?(:write)
                        :write
                end
        }

        # list of labels reachable without passing through label
        badlab = {}
        build_badlabel = lambda { |label|
                next if badlab[label]
                badlab[label] = []
                todo = [g.start]
                while l = todo.pop
                        next if l == label or badlab[label].include?(l)
                        badlab[label] << l
                        todo.concat g.to_optim[l].to_a
                end
        }

        # returns the next subexpr where var is read
        # returns :write if var is written before being read
        # returns :split if the codepath splits with both subpath reading or codepath merges with another
        # returns nil if var is never read
        # idx is the index of the first cexpr at g.exprs[label] to look at
        find_next_read = lambda { |label, idx, var|
                find_next_read_rec[label, idx, var, [], []]
        }
        find_prev_read = lambda { |label, idx, var|
                find_prev_read_rec[label, idx, var, []]
        }
        # same as find_next_read, but returns :split if there exist a path from g.start to the read without passing through label
        find_next_read_bl = lambda { |label, idx, var|
                build_badlabel[label]
                find_next_read_rec[label, idx, var, [], badlab[label]]
        }

        # walk each node, optimize data accesses there
        # replace no longer useful exprs with CExpr[nil, nil, nil], those are wiped later.
        g.exprs.each { |label, exprs|
                next if not g.block[label]
                i = 0
                while i < exprs.length
                        e = exprs[i]
                        i += 1

                        # TODO x = x + 1  =>  x += 1  =>  ++x       here, move all other optimizations after (in optim_code)
                        # needs also int & 0xffffffff -> int, *&var  etc (decomp_type? optim_type?)
                        if (e.op == :'++' or e.op == :'--') and v = (e.lexpr || e.rexpr) and v.kind_of?(C::Variable) and
                                        scope.symbol[v.name] and not v.type.qualifier.to_a.include?(:volatile)
                                next if !((pos = :post.to_sym) and (oe = find_next_read_bl[label, i, v]) and oe.kind_of?(C::CExpression)) and
                                        !((pos = :prev.to_sym) and (oe = find_prev_read[label, i-2, v]) and oe.kind_of?(C::CExpression))
                                next if oe.op == :& and not oe.lexpr       # no &(++eax)

                                # merge pre/postincrement into next/prev var usage
                                # find_prev_read must fwd check when it finds something, to avoid
                                #  while(x) x++; return x; to be converted to while(x++); return x;  (return wrong value)
                                case oe.op
                                when e.op
                                        # bla(i--); --i   bla(--i); --i   ++i; bla(i++)  =>  ignore
                                        next if pos == :pre or oe.lexpr
                                        # ++i; bla(++i)  =>  bla(i += 2)
                                        oe.lexpr = oe.rexpr
                                        oe.op = ((oe.op == :'++') ? :'+=' : :'-=')
                                        oe.rexpr = C::CExpression[2]

                                when :'++', :'--'  # opposite of e.op
                                        if (pos == :post and not oe.lexpr) or (pos == :pre and not oe.rexpr)
                                                # ++i; bla(--i)  =>  bla(i)
                                                # bla(i--); ++i  =>  bla(i)
                                                oe.op = nil
                                        elsif pos == :post
                                                # ++i; bla(i--)  =>  bla(i+1)
                                                oe.op = ((oe.op == :'++') ? :- : :+)
                                                oe.rexpr = C::CExpression[1]
                                        elsif pos == :pre
                                                # bla(--i); ++i  =>  bla(i-1)
                                                oe.lexpr = oe.rexpr
                                                oe.op = ((oe.op == :'++') ? :+ : :-)
                                                oe.rexpr = C::CExpression[1]
                                        end
                                when :'+=', :'-='
                                        # TODO i++; i += 4  =>  i += 5
                                        next
                                when *C::CExpression::AssignOp
                                        next      # ++i; i |= 4  =>  ignore
                                else
                                        if    pos == :post and v == oe.lexpr; oe.lexpr = C::CExpression[e.op, v]
                                        elsif pos == :post and v == oe.rexpr; oe.rexpr = C::CExpression[e.op, v]
                                        elsif pos == :prev and v == oe.rexpr; oe.rexpr = C::CExpression[v, e.op]
                                        elsif pos == :prev and v == oe.lexpr; oe.lexpr = C::CExpression[v, e.op]
                                        else raise 'foobar'       # find_dir_read failed
                                        end
                                end

                                i -= 1
                                exprs.delete_at(i)
                                g.invalidate(label)
                                e.lexpr = e.op = e.rexpr = nil


                        elsif e.op == :'=' and v = e.lexpr and v.kind_of?(C::Variable) and scope.symbol[v.name] and
                                        not v.type.qualifier.to_a.include?(:volatile) and not find_next_read_ce[e.rexpr, v]

                                # reduce trivial static assignments
                                # b = a + 1 ; a = b => a = a + 1 ; b = a
                                if ne = g.exprs[label][i] and ne.op == :'=' and ne.rexpr == e.lexpr and ne.lexpr.kind_of?(C::Variable) and find_next_read_ce[e.rexpr, ne.lexpr]
                                        e.lexpr, ne.lexpr, ne.rexpr = ne.lexpr, ne.rexpr, ne.lexpr
                                        optimize_code(e)
                                        i -= 1
                                        g.invalidate(label)
                                        next
                                end

                                # i = 4 ; f(i) => f(4)
                                if (e.rexpr.kind_of?(C::CExpression) and iv = e.rexpr.reduce(@c_parser) and iv.kind_of?(::Integer)) or
                                   (e.rexpr.kind_of?(C::CExpression) and e.rexpr.op == :& and not e.rexpr.lexpr and e.rexpr.lexpr.kind_of?(C::Variable)) or
                                   (e.rexpr.kind_of?(C::Variable) and e.rexpr.type.kind_of?(C::Array))
                                        rewritten = false
                                        readers = []
                                        discard = [e]
                                        g.exprs.each { |l, el|
                                                el.each_with_index { |ce, ci|
                                                        if ce_write(ce, v) and [label, i-1] != [l, ci]
                                                                if ce == e
                                                                        discard << ce
                                                                else
                                                                        rewritten = true
                                                                        break
                                                                end
                                                        elsif ce_read(ce, v)
                                                                if walk_ce(ce) { |_ce| break true if _ce.op == :& and not _ce.lexpr and _ce.rexpr == v }
                                                                        # i = 2 ; j = &i  =!>  j = &2
                                                                        rewritten = true
                                                                        break
                                                                end
                                                                readers << ce
                                                        end
                                                } if not rewritten
                                        }
                                        if not rewritten
                                                ce_patch(readers, v, C::CExpression[iv || e.rexpr])
                                                discard.each { |d| d.lexpr = d.op = d.rexpr = nil }
                                                next
                                        end
                                end

                                case find_next_read[label, i, v]
                                when C::CExpression
                                        # read in one place only, try to patch rexpr in there
                                        r = e.rexpr

                                        # must check for conflicts (x = y; y += 1; foo(x)  =!>  foo(y))
                                        # XXX x = a[1]; *(a+1) = 28; foo(x)...
                                        isfunc = false
                                        depend_vars = []
                                        walk_ce(C::CExpression[r]) { |ce|
                                                isfunc = true if ce.op == :func and (not ce.lexpr.kind_of?(C::Variable) or
                                                                not ce.lexpr.has_attribute('pure'))    # XXX is there a C attr for func depending only on staticvars+param ?
                                                depend_vars << ce.lexpr if ce.lexpr.kind_of?(C::Variable)
                                                depend_vars << ce.rexpr if ce.rexpr.kind_of?(C::Variable) and (ce.lexpr or ce.op != :&)  # a = &v; v = 12; func(a) => func(&v)
                                                depend_vars << ce if ce.lvalue?
                                                depend_vars.concat(ce.rexpr.grep(C::Variable)) if ce.rexpr.kind_of?(::Array)
                                        }
                                        depend_vars.uniq!

                                        # XXX x = 1; if () { x = 2; } foo(x)  =!>  foo(1)  (find_next_read will return this)
                                        #     we'll just redo a find_next_read like
                                        # XXX b = &a; a = 1; *b = 2; foo(a)  unhandled & generate bad C
                                        l_l = label
                                        l_i = i
                                        while g.exprs[l_l].to_a.each_with_index { |ce_, n_i|
                                                next if n_i < l_i
                                                # count occurences of read v in ce_
                                                cnt = 0
                                                bad = false
                                                walk_ce(ce_) { |ce|
                                                        case ce.op
                                                        when :funcall
                                                                bad = true if isfunc
                                                                ce.rexpr.each { |a| cnt += 1 if a == v }
                                                                cnt += 1 if ce.lexpr == v
                                                        when :'='
                                                                bad = true if depend_vars.include?(ce.lexpr)
                                                                cnt += 1 if ce.rexpr == v
                                                        else
                                                                bad = true if (ce.op == :'++' or ce.op == :'--') and depend_vars.include?(ce.rexpr)
                                                                bad = true if C::CExpression::AssignOp.include?(ce.op) and depend_vars.include?(ce.lexpr)
                                                                cnt += 1 if ce.lexpr == v
                                                                cnt += 1 if ce.rexpr == v
                                                        end
                                                }
                                                case cnt
                                                when 0
                                                        break if bad
                                                        next
                                                when 1   # good
                                                        break if e.complexity > 10 and ce_.complexity > 3       # try to keep the C readable
                                                        # x = 1; y = x; z = x;  =>  cannot suppress x
                                                        nr = find_next_read[l_l, n_i+1, v]
                                                        break if (nr.kind_of?(C::CExpression) or nr == :split) and not walk_ce(ce_) { |ce| break true if ce.op == :'=' and ce.lexpr == v }
                                                else break       # a = 1; b = a + a  => fail
                                                end

                                                # TODO XXX x = 1; y = x; z = x;
                                                res = walk_ce(ce_, true) { |ce|
                                                        case ce.op
                                                        when :funcall
                                                                if ce.rexpr.to_a.each_with_index { |a,i_|
                                                                        next if a != v
                                                                        ce.rexpr[i_] = r
                                                                        break :done
                                                                } == :done
                                                                        break :done
                                                                elsif ce.lexpr == v
                                                                        ce.lexpr = r
                                                                        break :done
                                                                elsif isfunc
                                                                        break :fail
                                                                end
                                                        when *C::CExpression::AssignOp
                                                                break :fail if not ce.lexpr and depend_vars.include?(ce.rexpr) # ++depend
                                                                if ce.rexpr == v
                                                                        ce.rexpr = r
                                                                        break :done
                                                                elsif ce.lexpr == v or depend_vars.include?(ce.lexpr)
                                                                        break :fail
                                                                end
                                                        else
                                                                break :fail if ce.op == :& and not ce.lexpr and ce.rexpr == v
                                                                if ce.lexpr == v
                                                                        ce.lexpr = r
                                                                        break :done
                                                                elsif ce.rexpr == v
                                                                        ce_.type = r.type if not ce_.op and ce_.rexpr == v    # return (int32)eax
                                                                        ce.rexpr = r
                                                                        break :done
                                                                end
                                                        end
                                                }
                                                case res
                                                when :done
                                                        i -= 1
                                                        exprs.delete_at(i)
                                                        g.invalidate(label)
                                                        g.invalidate(l_l) if l_l != label
                                                        e.lexpr = e.op = e.rexpr = nil
                                                        break
                                                when :fail
                                                        break
                                                end
                                        }
                                                # ignore branches that will never reuse v
                                                may_to = g.to_optim[l_l].find_all { |to| find_next_read[to, 0, v].kind_of?(C::CExpression) }
                                                if may_to.length == 1 and to = may_to.first and to != l_l and g.from_optim[to] == [l_l] and
                                                                not sideeffect(e.rexpr)        # dont do cross-block var optimization, eg 'a = f() ; if() return a' =!> 'if () return f()'
                                                        l_i = 0
                                                        l_l = to
                                                else break
                                                end
                                        end

                                when nil, :write
                                        # useless assignment (value never read later)
                                        # XXX foo = &bar; bar = 12; baz(*foo)
                                        e.replace(C::CExpression[e.rexpr])
                                        # remove sideeffectless subexprs
                                        loop do
                                                case e.op
                                                when :funcall, *C::CExpression::AssignOp
                                                else
                                                        l = (e.lexpr.kind_of?(C::CExpression) and sideeffect(e.lexpr))
                                                        r = (e.rexpr.kind_of?(C::CExpression) and sideeffect(e.rexpr))
                                                        if l and r      # could split...
                                                        elsif l
                                                                e.replace(e.lexpr)
                                                                next
                                                        elsif r
                                                                e.replace(e.rexpr)
                                                                next
                                                        else # remove the assignment altogether
                                                                i -= 1
                                                                exprs.delete_at(i)
                                                                e.lexpr = e.op = e.rexpr = nil
                                                        end
                                                end
                                                break
                                        end
                                        g.invalidate(label)
                                end
                        end
                end
        }


        # var propagation
        # find vars who are written only once, and replace all their use by their assignment value
        # XXX this may supercede some of the ugly stuff just before
        loop do
                g.invalidate
                writtenonce = {}     # var => [label, offset] of assignment
                neverwritten = {}    # var => true (eg args)
                g.exprs_var.each { |varname, h1|
                        next if not var = scope.symbol[varname]
                        neverwritten[varname] = true
                        h1.each { |label, idx_list|
                                idx_list.each { |expr_idx|
                                        e = g.exprs[label][expr_idx]
                                        if ce_write(e, var)
                                                neverwritten.delete varname
                                                if writtenonce[varname]
                                                        # written twice, abort
                                                        writtenonce.delete varname
                                                        break
                                                elsif e.op == :'=' and e.lexpr == var and not ce_write(e.rexpr, var) and not ce_read(e.rexpr, var)
                                                        # good !
                                                        writtenonce[varname] = [label, expr_idx]
                                                else
                                                        # unhandled write, abort
                                                        break
                                                end
                                        end
                                }
                        }
                }
                # XXX check cycles ?

                is_trivial_assign = lambda { |e, rec_max|
                        case e
                        when C::Variable; writtenonce[e.name] or neverwritten[e.name]
                        when ::Integer, nil; true
                        when C::CExpression
                                rec_max > 0 and
                                not sideeffect(e) and
                                (e.op == :+ or e.op == :- or e.op == nil) and
                                is_trivial_assign[e.lexpr, rec_max-1] and
                                is_trivial_assign[e.rexpr, rec_max-1]
                        end
                }
                break if not trivial_var = writtenonce.keys.find { |var|
                        l, i = writtenonce[var]
                        e = g.exprs[l][i].rexpr
                        is_trivial_assign[e, 3]
                }
                label, idx = writtenonce[trivial_var]
                assign_expr = g.exprs[label][idx]
                var = assign_expr.lexpr
                value = assign_expr.rexpr
                g.exprs_var[trivial_var].each { |l, list|
                        list.each { |i|
                                e = g.exprs[l][i]
                                if l == label and i == idx
                                        e.lexpr = e.op = e.rexpr = nil
                                else
                                        ce_patch(e, var, value)
                                        optimize_code(e)
                                end
                        }
                }
        end


        # wipe cexprs marked in the previous step
        walk(scope) { |st|
                next if not st.kind_of?(C::Block)
                st.statements.delete_if { |e| e.kind_of?(C::CExpression) and not e.lexpr and not e.op and not e.rexpr }
        }
        g.invalidate

        # reoptimize cexprs
        walk_ce(scope, true) { |ce|
                # redo some simplification that may become available after variable propagation
                # int8 & 255  =>  int8
                if ce.op == :& and ce.lexpr and ce.lexpr.type.integral? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == (1 << (8*sizeof(ce.lexpr))) - 1
                        ce.replace C::CExpression[ce.lexpr]
                end

                # int *ptr; *(ptr + 4) => ptr[4]
                if ce.op == :* and not ce.lexpr and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :+ and var = ce.rexpr.lexpr and var.kind_of?(C::Variable) and var.type.pointer?
                        ce.lexpr, ce.op, ce.rexpr = ce.rexpr.lexpr, :'[]', ce.rexpr.rexpr
                end

                # useless casts
                if not ce.op and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr.kind_of?(C::CExpression) or
                                (ce.type.pointer? and ce.rexpr.rexpr == 0 and not ce.type.pointed.untypedef.kind_of?(C::Union)))   # keep ((struct*)0)->memb
                        ce.rexpr = ce.rexpr.rexpr
                end
                if not ce.op and ce.rexpr.kind_of?(C::CExpression) and (ce.type == ce.rexpr.type or (ce.type.integral? and ce.rexpr.type.integral?))
                        ce.replace ce.rexpr
                end
                # useless casts (type)*((oeua)Ptype)
                if not ce.op and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.op == :* and not ce.rexpr.lexpr and ce.rexpr.rexpr.kind_of?(C::CExpression) and not ce.rexpr.rexpr.op and
                                p = ce.rexpr.rexpr.rexpr and p.kind_of?(C::Typed) and p.type.pointer? and ce.type == p.type.pointed
                        ce.op = ce.rexpr.op
                        ce.rexpr = ce.rexpr.rexpr.rexpr
                end

                # (char *)42 => new global var
                if not ce.op and ce.type.pointer? and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer)
                        ce.rexpr = new_global_var(ce.rexpr.rexpr, ce.type, scope) || ce.rexpr
                end

                # (a > 0) != 0
                if ce.op == :'!=' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0 and ce.lexpr.kind_of?(C::CExpression) and
                                [:<, :<=, :>, :>=, :'==', :'!=', :'!'].include?(ce.lexpr.op)
                        ce.replace ce.lexpr
                end
                # a == 0  =>  !a
                if ce.op == :== and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 0
                        ce.replace C::CExpression[:'!', ce.lexpr]
                end
                # !(int)a  =>  !a
                if ce.op == :'!' and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(C::CExpression)
                        ce.rexpr = ce.rexpr.rexpr
                end
                # (int)a < (int)b  =>  a < b TODO uint <-> int
                if [:<, :<=, :>, :>=].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.lexpr.kind_of?(C::CExpression) and not ce.rexpr.op and not ce.lexpr.op and
                        ce.rexpr.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.type.pointer? and ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.type.pointer?
                        ce.rexpr = ce.rexpr.rexpr
                        ce.lexpr = ce.lexpr.rexpr
                end

                # a & 3 & 1
                while (ce.op == :& or ce.op == :|) and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and
                                ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == ce.op and ce.lexpr.lexpr and
                                ce.lexpr.rexpr.kind_of?(C::CExpression) and ce.lexpr.rexpr.rexpr.kind_of?(::Integer)
                        ce.lexpr, ce.rexpr.rexpr = ce.lexpr.lexpr, ce.lexpr.rexpr.rexpr.send(ce.op, ce.rexpr.rexpr)
                end

                # x = x | 4 => x |= 4
                if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :-, :*, :/, :|, :&, :^, :>>, :<<].include?(ce.rexpr.op) and ce.rexpr.lexpr == ce.lexpr
                        ce.op = (ce.rexpr.op.to_s + '=').to_sym
                        ce.rexpr = ce.rexpr.rexpr
                end

                # x = 4 | x => x |= 4
                if ce.op == :'=' and ce.rexpr.kind_of?(C::CExpression) and [:+, :*, :|, :&, :^].include?(ce.rexpr.op) and ce.rexpr.rexpr == ce.lexpr
                        ce.op = (ce.rexpr.op.to_s + '=').to_sym
                        ce.rexpr = ce.rexpr.lexpr
                end

                # x += 1 => ++x
                if (ce.op == :'+=' or ce.op == :'-=') and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and (ce.rexpr.rexpr == 1 or ce.rexpr.rexpr == -1)
                        if ce.rexpr.rexpr == 1
                                ce.op = {:'+=' => :'++', :'-=' => :'--'}[ce.op]
                        else
                                ce.op = {:'+=' => :'--', :'-=' => :'++'}[ce.op]
                        end
                        ce.rexpr = ce.lexpr
                        ce.lexpr = nil
                end

                # --x+1 => x--
                if (ce.op == :+ or ce.op == :-) and ce.lexpr.kind_of?(C::CExpression) and ce.lexpr.op == {:+ => :'--', :- => :'++'}[ce.op] and
                                ce.lexpr.rexpr and ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr == 1
                        ce.lexpr, ce.op, ce.rexpr = ce.lexpr.rexpr, ce.lexpr.op, nil
                end

                # 1+2 => 3
                if ce.lexpr.kind_of?(C::CExpression) and not ce.lexpr.op and ce.lexpr.rexpr.kind_of?(::Integer) and
                   ce.rexpr.kind_of?(C::CExpression) and not ce.rexpr.op and ce.rexpr.rexpr.kind_of?(::Integer) and
                   [:+, :-, :*, :/, :|, :&, :^, :>, :'==', :'!=', :<, :>=, :<=].include?(ce.op)
                        ce.rexpr = ce.reduce(@c_parser)
                        ce.lexpr = ce.op = nil
                end
        }
end
redecompile(name) click to toggle source

redecompile a function, redecompiles functions calling it if its prototype changed

# File metasm/decompile.rb, line 161
def redecompile(name)
        @c_parser.toplevel.statements.delete_if { |st| st.kind_of?(C::Declaration) and st.var.name == name }
        oldvar = @c_parser.toplevel.symbol.delete name

        decompile_func(name)

        if oldvar and newvar = @c_parser.toplevel.symbol[name] and oldvar.type.kind_of?(C::Function) and newvar.type.kind_of?(C::Function)
                o, n = oldvar.type, newvar.type
                if o.type != n.type or o.args.to_a.length != n.args.to_a.length or o.args.to_a.zip(n.args.to_a).find { |oa, na| oa.type != na.type }
                        # XXX a may depend on b and c, and b may depend on c -> redecompile c twice
                        # XXX if the dcmp is unstable, may also infinite loop on mutually recursive funcs..
                        @c_parser.toplevel.statements.dup.each { |st|
                                next if not st.kind_of?(C::Declaration)
                                next if not st.var.initializer
                                next if st.var.name == name
                                next if not walk_ce(st) { |ce| break true if ce.op == :funcall and ce.lexpr.kind_of?(C::Variable) and ce.lexpr.name == name }
                                redecompile(st.var.name)
                        }
                end
        end
end
remove_labels(scope) click to toggle source

remove unused labels

# File metasm/decompile.rb, line 921
def remove_labels(scope)
        return if forbid_optimize_labels

        used = []
        walk(scope) { |ss|
                used |= [ss.target] if ss.kind_of?(C::Goto)
        }
        walk(scope) { |s|
                next if not s.kind_of?(C::Block)
                s.statements.delete_if { |l|
                        l.kind_of?(C::Label) and not used.include?(l.name)
                }
        }

        # remove implicit continue; at end of loop
        walk(scope) { |s|
                next if not s.kind_of?(C::While)
                if s.body.kind_of?(C::Block) and s.body.statements.last.kind_of?(C::Continue)
                        s.body.statements.pop
                end
        }
end
remove_unreferenced_vars(scope) click to toggle source
# File metasm/decompile.rb, line 2701
def remove_unreferenced_vars(scope)
        used = {}
        walk_ce(scope) { |ce|
                # remove unreferenced local vars
                used[ce.rexpr.name] = true if ce.rexpr.kind_of?(C::Variable)
                used[ce.lexpr.name] = true if ce.lexpr.kind_of?(C::Variable)
                ce.rexpr.each { |v| used[v.name] = true if v.kind_of?(C::Variable) } if ce.rexpr.kind_of?(::Array)
        }
        unused = scope.symbol.keys.find_all { |n| not used[n] }
        unused.each { |v| scope.symbol[v].add_attribute 'unused' }    # fastcall args need it
        scope.statements.delete_if { |sm| sm.kind_of?(C::Declaration) and unused.include?(sm.var.name) }
        scope.symbol.delete_if { |n, v| unused.include?(n) }
end
rename_variables(scope) click to toggle source

rename local variables from subfunc arg names

# File metasm/decompile.rb, line 2873
def rename_variables(scope)
        funcs = []
        cntrs = []
        cmpi = []

        walk_ce(scope) { |ce|
                funcs << ce if ce.op == :funcall
                cntrs << (ce.lexpr || ce.rexpr) if ce.op == :'++'
                cmpi << ce.lexpr if [:<, :>, :<=, :>=, :==, :'!='].include?(ce.op) and ce.rexpr.kind_of?(C::CExpression) and ce.rexpr.rexpr.kind_of?(::Integer)
        }

        rename = lambda { |var, name|
                var = var.rexpr if var.kind_of?(C::CExpression) and not var.op
                next if not var.kind_of?(C::Variable) or not scope.symbol[var.name] or not name
                next if (var.name !~ /^(var|arg)_/ and not var.storage == :register) or not scope.symbol[var.name] or name =~ /^(var|arg)_/
                s = scope.symbol_ancestors
                n = name
                i = 0
                n = name + "#{i+=1}" while s[n]
                scope.symbol[n] = scope.symbol.delete(var.name)
                var.name = n
        }

        funcs.each { |ce|
                next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function)
                ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa| rename[a, fa.name] if fa }
        }
        funcs.each { |ce|
                next if not ce.lexpr.kind_of?(C::Variable) or not ce.lexpr.type.kind_of?(C::Function)
                ce.rexpr.to_a.zip(ce.lexpr.type.args.to_a).each { |a, fa|
                        next if not a.kind_of?(C::CExpression) or a.op != :& or a.lexpr
                        next if not fa or not fa.name
                        rename[a.rexpr, fa.name.sub(/^l?p/, '')]
                }
        }
        (cntrs & cmpi).each { |v| rename[v, 'cntr'] }
end
sideeffect(exp, scope=nil) click to toggle source

checks if an expr has sideeffects (funcall, var assignment, mem dereference, use var out of scope if specified)

# File metasm/decompile.rb, line 2000
def sideeffect(exp, scope=nil)
        case exp
        when nil, ::Numeric, ::String; false
        when ::Array; exp.any? { |_e| sideeffect _e, scope }
        when C::Variable; (scope and not scope.symbol[exp.name]) or exp.type.qualifier.to_a.include?(:volatile)
        when C::CExpression; (exp.op == :* and not exp.lexpr) or exp.op == :funcall or C::CExpression::AssignOp.include?(exp.op) or
                        sideeffect(exp.lexpr, scope) or sideeffect(exp.rexpr, scope)
        else true     # failsafe
        end
end
simplify_goto(scope, keepret = false) click to toggle source

simplify goto -> goto / goto -> return

# File metasm/decompile.rb, line 450
def simplify_goto(scope, keepret = false)
        if not keepret and scope.statements[-1].kind_of?(C::Return) and not scope.statements[-2].kind_of?(C::Label)
                scope.statements.insert(-2, C::Label.new("ret_label"))
        end

        jumpto = {}
        walk(scope) { |s|
                next if not s.kind_of?(C::Block)
                s.statements.each_with_index { |ss, i|
                        case ss
                        when C::Goto, C::Return
                                while l = s.statements[i -= 1] and l.kind_of?(C::Label)
                                        jumpto[l.name] = ss
                                end
                        end
                }
        }

        simpler = lambda { |s|
                case s
                when C::Goto
                        if jumpto[s.target]
                                r = jumpto[s.target].dup
                                r.value = r.value.deep_dup if r.kind_of?(C::Return) and r.value.kind_of?(C::CExpression)
                                r
                        end
                when C::Return
                        if not keepret and scope.statements[-1].kind_of?(C::Return) and s.value == scope.statements[-1].value and s != scope.statements[-1]
                                C::Goto.new(scope.statements[-2].name)
                        end
                end
        }

        walk(scope) { |s|
                case s
                when C::Block
                        s.statements.each_with_index { |ss, i|
                                if sp = simpler[ss]
                                        ss = s.statements[i] = sp
                                end
                        }
                when C::If
                        if sp = simpler[s.bthen]
                                s.bthen = sp
                        end
                end
        }

        # remove unreferenced labels
        remove_labels(scope)

        walk(scope) { |s|
                next if not s.kind_of?(C::Block)
                del = false
                # remove dead code  goto a; goto b; if (0) { z: bla; }  => rm goto b
                s.statements.delete_if { |st|
                        case st
                        when C::Goto, C::Return
                                olddel = del
                                del = true
                                olddel
                        else
                                del = false
                        end
                }
                # if () { goto x; } x:
                s.statements.each_with_index { |ss, i|
                        if ss.kind_of?(C::If)
                                t = ss.bthen
                                t = t.statements.first if t.kind_of?(C::Block)
                                if t.kind_of?(C::Goto) and s.statements[i+1].kind_of?(C::Label) and s.statements[i+1].name == t.target
                                        ss.bthen = C::Block.new(scope)
                                end
                        end
                }
        }

        remove_labels(scope)
end
simplify_varname_noalias(scope) click to toggle source

revert the unaliasing namechange of vars where no alias subsists

# File metasm/decompile.rb, line 1197
def simplify_varname_noalias(scope)
        names = scope.symbol.keys
        names.delete_if { |k|
                next if not b = k[/^(.*)_a\d+$/, 1]
                next if scope.symbol[k].stackoff.to_i > 0
                if not names.find { |n| n != k and (n == b or n[/^(.*)_a\d+$/, 1] == b) }
                        scope.symbol[b] = scope.symbol.delete(k)
                        scope.symbol[b].name = b
                end
        }
end
sizeof(var, type=nil) click to toggle source

forwards to @c_parser, handles cast to Array (these should not happen btw…)

# File metasm/decompile.rb, line 2978
def sizeof(var, type=nil)
        var, type = nil, var if var.kind_of?(C::Type) and not type
        type ||= var.type
        return @c_parser.typesize[:ptr] if type.kind_of?(C::Array) and not var.kind_of?(C::Variable)
        @c_parser.sizeof(var, type) rescue -1
end
stackoff_to_varname(off) click to toggle source

give a name to a stackoffset (relative to start of func) 4 => :arg_0, -8 => :var_4 etc

# File metasm/decompile.rb, line 387
def stackoff_to_varname(off)
        if off >= @c_parser.typesize[:ptr]; 'arg_%X' % ( off-@c_parser.typesize[:ptr])        #  4 => arg_0,  8 => arg_4..
        elsif off > 0; 'arg_0%X' % off
        elsif off == 0; 'retaddr'
        elsif off <= -@dasm.cpu.size/8; 'var_%X' % (-off-@dasm.cpu.size/8)    # -4 => var_0, -8 => var_4..
        else 'var_0%X' % -off
        end
end
structoffset(st, ptr, off, msz) click to toggle source

struct foo { int i; int j; struct { int k; int l; } m; }; bla+12 => &bla->m.l st is a struct, ptr is an expr pointing to a struct, off is a numeric offset from ptr, msz is the size of the pointed member (nil ignored)

# File metasm/decompile.rb, line 1632
def structoffset(st, ptr, off, msz)
        tabidx = off / sizeof(st)
        off -= tabidx * sizeof(st)
        ptr = C::CExpression[:&, [ptr, :'[]', [tabidx]]] if tabidx != 0 or ptr.type.untypedef.kind_of?(C::Array)
        return ptr if off == 0 and (not msz or        # avoid infinite recursion with eg chained list
                        (ptr.kind_of?(C::CExpression) and ((ptr.op == :& and not ptr.lexpr and s=ptr.rexpr) or (ptr.op == :'.' and s=ptr)) and
                        not s.type.untypedef.kind_of?(C::Union)))

        m_ptr = lambda { |m|
                if ptr.kind_of?(C::CExpression) and ptr.op == :& and not ptr.lexpr
                       C::CExpression[ptr.rexpr, :'.', m.name]
                else
                       C::CExpression[ptr, :'->', m.name]
                end
        }

        # recursive proc to list all named members, including in anonymous substructs
        submemb = lambda { |sm| sm.name ? sm : sm.type.kind_of?(C::Union) ? sm.type.members.to_a.map { |ssm| submemb[ssm] } : nil }
        mbs = st.members.to_a.map { |m| submemb[m] }.flatten.compact
        mo = mbs.inject({}) { |h, m| h.update m => st.offsetof(@c_parser, m.name) }

        if  sm = mbs.find { |m| mo[m] == off and (not msz or sizeof(m) == msz) } ||
                 mbs.find { |m| mo[m] <= off and mo[m]+sizeof(m) > off }
                off -= mo[sm]
                sst = sm.type.untypedef
                #return ptr if mo[sm] == 0 and sst.pointer? and sst.type.untypedef == st     # TODO fix infinite recursion on mutually recursive ptrs
                ptr = C::CExpression[:&, m_ptr[sm]]
                if sst.kind_of?(C::Union)
                        return structoffset(sst, ptr, off, msz)
                end
        end

        if off != 0
                C::CExpression[[[ptr], C::Pointer.new(C::BaseType.new(:__int8))], :+, [off]]
        else
                ptr
        end
end
unalias_var(var, scope, g = c_to_graph(scope)) click to toggle source

duplicates a var per domain value

# File metasm/decompile.rb, line 1008
def unalias_var(var, scope, g = c_to_graph(scope))
        # [label, index] of references to var (reading it, writing it, ro/wo it (eg eax = *eax => eax_0 = *eax_1))
        read = {}
        write = {}
        ro = {}
        wo = {}
        g_exprs = {}
        g.exprs_var[var.name].to_h.each { |k, v| g_exprs[k] = v.map { |i| g.exprs[k][i] } }

        # list of [l, i] for which domain is not known
        unchecked = []

        # mark all exprs of the graph
        # TODO handle var_14 __attribute__((out)) = &curvar <=> curvar write
        r = var.has_attribute_var('register')
        g_exprs.each { |label, exprs|
                exprs.each_with_index { |ce, i|
                        if ce_read(ce, var)
                                if (ce.op == :'=' and isvar(ce.lexpr, var) and not ce_write(ce.rexpr, var)) or
                                   (ce.op == :funcall and r and not ce_write(ce.lexpr, var) and not ce_write(ce.rexpr, var) and @dasm.cpu.abi_funcall[:changed].include?(r.to_sym))
                                        (ro[label] ||= []) << i
                                        (wo[label] ||= []) << i
                                        unchecked << [label, i, :up] << [label, i, :down]
                                else
                                        (read[label] ||= []) << i
                                        unchecked << [label, i]
                                end
                        elsif ce_write(ce, var)
                                (write[label] ||= []) << i
                                unchecked << [label, i]
                        end
                }
        }

        # stuff when filling the domain (flood algorithm)
        dom = dom_ro = dom_wo = todo_up = todo_down = func_top = nil

        # flood by walking the graph up from [l, i] (excluded)
        # marks stuff do walk down
        walk_up = lambda { |l,  i|
                todo_w = [[l, i-1]]
                done_w = []
                while o = todo_w.pop
                        next if done_w.include?(o)
                        done_w << o
                        l, i = o
                        loop do
                                if read[l].to_a.include?(i)
                                        # XXX not optimal (should mark only the uppest read)
                                        todo_down |= [[l, i]] if not dom.include?([l, i])
                                        dom |= [[l, i]]
                                elsif write[l].to_a.include?(i)
                                        todo_down |= [[l, i]] if not dom.include?([l, i])
                                        dom |= [[l, i]]
                                        break
                                elsif wo[l].to_a.include?(i)
                                        todo_down |= [[l, i]] if not dom_wo.include?([l, i, :down])
                                        dom_wo |= [[l, i, :down]]
                                        break
                                end
                                i -= 1
                                if i < 0
                                        g.from_optim[l].to_a.each { |ll|
                                                todo_w << [ll, g_exprs[ll].to_a.length-1]
                                        }
                                        # read unitialized
                                        func_top = true if g.from_optim[l].to_a.empty?
                                        break
                                end
                        end
                end
        }

        # flood by walking the graph down from [l, i] (excluded)
        # marks stuff to walk up
        walk_down = lambda { |l, i|
                todo_w = [[l, i+1]]
                done_w = []
                while o = todo_w.pop
                        next if done_w.include?(o)
                        done_w << o
                        l, i = o
                        loop do
                                if read[l].to_a.include?(i)
                                        todo_up |= [[l, i]] if not dom.include?([l, i])
                                        dom |= [[l, i]]
                                elsif write[l].to_a.include?(i)
                                        break
                                elsif ro[l].to_a.include?(i)
                                        todo_up |= [[l, i]] if not dom_ro.include?([l, i, :up])
                                        dom_ro |= [[l, i, :up]]
                                        break
                                end
                                i += 1
                                if i >= g_exprs[l].to_a.length
                                        g.to_optim[l].to_a.each { |ll|
                                                todo_w << [ll, 0]
                                        }
                                        break
                                end
                        end
                end
        }

        # check it out
        while o = unchecked.shift
                dom = []
                dom_ro = []
                dom_wo = []
                func_top = false

                todo_up = []
                todo_down = []

                # init
                if read[o[0]].to_a.include?(o[1])
                        todo_up << o
                        todo_down << o
                        dom << o
                elsif write[o[0]].to_a.include?(o[1])
                        todo_down << o
                        dom << o
                elsif o[2] == :up
                        todo_up << o
                        dom_ro << o
                elsif o[2] == :down
                        todo_down << o
                        dom_wo << o
                else raise
                end

                # loop
                while todo_up.first or todo_down.first
                        todo_up.each { |oo| walk_up[oo[0], oo[1]] }
                        todo_up.clear

                        todo_down.each { |oo| walk_down[oo[0], oo[1]] }
                        todo_down.clear
                end

                unchecked -= dom + dom_wo + dom_ro

                next if func_top

                # patch
                n_i = 0
                n_i += 1 while scope.symbol_ancestors[newvarname = "#{var.name}_a#{n_i}"]

                nv = var.dup
                nv.misc = var.misc ? var.misc.dup : {}
                nv.storage = :register if nv.has_attribute_var('register')
                nv.attributes = nv.attributes.dup if nv.attributes
                nv.name = newvarname
                nv.misc[:unalias_name] = newvarname
                scope.statements << C::Declaration.new(nv)
                scope.symbol[nv.name] = nv

                dom.each { |oo| ce_patch(g_exprs[oo[0]][oo[1]], var, nv) }
                dom_ro.each { |oo|
                        ce = g_exprs[oo[0]][oo[1]]
                        if ce.op == :funcall
                                ce_patch(ce, var, nv)
                        elsif ce.rexpr.kind_of?(C::CExpression)
                                ce_patch(ce.rexpr, var, nv)
                        else
                                ce.rexpr = nv
                        end
                }
                dom_wo.each { |oo|
                        ce = g_exprs[oo[0]][oo[1]]
                        if ce.op == :funcall
                        elsif ce.lexpr.kind_of?(C::CExpression)
                                ce_patch(ce.lexpr, var, nv)
                        else
                                ce.lexpr = nv
                        end
                }

                # check if the var is only used as an __out__ parameter
                if false and dom_ro.empty? and dom_wo.empty? and dom.length == 2 and # TODO
                                arg.has_attribute('out') and not arg.has_attribute('in')
                        # *(int32*)&var_10 = &var_4;
                        # set_pointed_value(*(int32*)&var_10);  =>  writeonly var_4, may start a new domain
                        nv.add_attribute('out')
                end
        end
end
unalias_vars(scope, func) click to toggle source

duplicate vars per domain value eg eax = 1; foo(eax); eax = 2; bar(eax); => eax = 1; foo(eax) eax_1 = 2; bar(eax_1);

eax = 1; if (bla) eax = 2; foo(eax);   =>  no change
# File metasm/decompile.rb, line 995
def unalias_vars(scope, func)
        g = c_to_graph(scope)

        # unalias func args first, they may include __attr__((out)) needed by the others
        funcalls = []
        walk_ce(scope) { |ce| funcalls << ce if ce.op == :funcall }
        vars = scope.symbol.values.sort_by { |v| walk_ce(funcalls) { |ce| break true if ce.rexpr == v } ? 0 : 1 }

        # find the domains of var aliases
        vars.each { |var| unalias_var(var, scope, g) }
end
walk(scope, post=false, patch=false) { |scope| ... } click to toggle source

yields each statement (recursive) replace the element by the block return value if patch is true

# File metasm/decompile.rb, line 2943
def walk(scope, post=false, patch=false, &b)
        case scope
        when ::Array
                scope.each_with_index { |s, i|
                        v = walk(s, post, patch, &b)
                        scope[i] = v if patch and v
                }
                nil
        when C::Statement
                v = yield scope if not post
                case scope
                when C::Block
                        walk(scope.statements, post, patch, &b)
                when C::If
                        vv = yield scope.test
                        scope.test = vv if patch and vv
                        walk(scope.bthen, post, patch, &b)
                        walk(scope.belse, post, patch, &b) if scope.belse
                when C::While, C::DoWhile
                        vv = yield scope.test
                        scope.test = vv if patch and vv
                        walk(scope.body, post, patch, &b)
                when C::Return
                        vv = yield scope.value
                        scope.value = vv if patch and vv
                end
                v = yield scope if post
                v
        when C::Declaration
                walk(scope.var.initializer, post, patch, &b) if scope.var.initializer
                nil
        end
end
walk_ce(ce, post=false) { |ce| ... } click to toggle source

yield each CExpr member (recursive, allows arrays, order: self(!post), lexpr, rexpr, self(post)) if given a non-CExpr, walks it until it finds a CExpr to yield

# File metasm/decompile.rb, line 2913
def walk_ce(ce, post=false, &b)
        case ce
        when C::CExpression
                yield ce if not post
                walk_ce(ce.lexpr, post, &b)
                walk_ce(ce.rexpr, post, &b)
                yield ce if post
        when ::Array
                ce.each { |ce_| walk_ce(ce_, post, &b) }
        when C::Statement
                case ce
                when C::Block; walk_ce(ce.statements, post, &b)
                when C::If
                        walk_ce(ce.test, post, &b)
                        walk_ce(ce.bthen, post, &b)
                        walk_ce(ce.belse, post, &b) if ce.belse
                when C::While, C::DoWhile
                        walk_ce(ce.test, post, &b)
                        walk_ce(ce.body, post, &b)
                when C::Return
                        walk_ce(ce.value, post, &b) if ce.value
                end
        when C::Declaration
                walk_ce(ce.var.initializer, post, &b) if ce.var.initializer
        end
        nil
end