# File metasm/cpu/st20/main.rb, line 11 def initialize(size=32) super() @endianness = :little @size = size init_opcodes end
# File metasm/cpu/st20/decompile.rb, line 121 def abi_funcall { :retval => :a, :changed => register_symbols } end
checks if expr is a valid return expression matching the :saveip instruction
# File metasm/cpu/st20/decode.rb, line 139 def backtrace_is_function_return(expr, di=nil) expr = Expression[expr].reduce_rec expr.kind_of?(Indirection) and expr.len == 2 and expr.target == Expression[:sp] end
returns true if the expression is an address on the stack
# File metasm/cpu/st20/decode.rb, line 169 def backtrace_is_stack_address(expr) Expression[expr].expr_externals.include?(:sp) end
updates the function backtrace_binding
# File metasm/cpu/st20/decode.rb, line 145 def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) b = f.backtrace_binding bt_val = lambda { |r| next if not retaddrlist b[r] = Expression::Unknown bt = [] retaddrlist.each { |retaddr| bt |= dasm.backtrace(Expression[r], retaddr, :include_start => true, :snapshot_addr => faddr, :origin => retaddr) } if bt.length != 1 b[r] = Expression::Unknown else b[r] = bt.first end } wantregs.each(&bt_val) b end
# File metasm/cpu/st20/decode.rb, line 66 def decode_instr_interpret(di, addr) case di.instruction.opname when 'j', 'cj', 'fcall' delta = di.instruction.args.last.reduce arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce di.instruction.args[-1] = Expression[arg] end di end
# File metasm/cpu/st20/decode.rb, line 21 def decode_instr_op(edata, di, pfx=0) # decode one byte from the bitstream, recurse if the byte is a prefix if edata.ptr >= edata.length or di.bin_length >= 4 di.instruction.args << Expression[pfx] return di end # bytestream structure : # sequence of prefixes, which build a word 4 bits at a time # last element = function code # 'opr' is a special function, means use the prefix word as an opcode number from 'operate' byte = edata.read(1).unpack('C')[0] fcode = byte & 0xf0 arg = byte & 0x0f pfx = (pfx << 4) | arg di.opcode = @opcode_list[fcode >> 4] di.instruction.opname = di.opcode.name di.bin_length += 1 case di.instruction.opname when 'pfix' return decode_instr_op(edata, di, pfx) when 'nfix' pfx ^= -1 di.instruction.opname = 'pfix' # will be displayed on EOS, and we cannot represent the whole decoded pfx with 'nfix' return decode_instr_op(edata, di, pfx) when 'opr' if op = @op_operate[pfx] # operands have no arg (they work on the implicit 3-register stack A B C) di.instruction.opname = op di.opcode = @opc_operate[op] || di.opcode else # unknown operand, keep the generic form di.instruction.args << Expression[pfx] end else di.instruction.args << Expression[pfx] end di end
decodes the instruction at edata.ptr, mapped at virtual address off
# File metasm/cpu/st20/decode.rb, line 13 def decode_instruction(edata, addr) return if edata.ptr >= edata.length di = DecodedInstruction.new self di.address = addr di = decode_instr_op(edata, di) decode_instr_interpret(di, addr) end
# File metasm/cpu/st20/decompile.rb, line 125 def decompile_blocks(dcmp, myblocks, deps, func, nextaddr = nil) scope = func.initializer func.type.args.each { |a| scope.symbol[a.name] = a } stmts = scope.statements blocks_toclean = myblocks.dup until myblocks.empty? b, to = myblocks.shift if l = dcmp.dasm.get_label_at(b) stmts << C::Label.new(l) end # list of assignments [[dest reg, expr assigned]] ops = [] # reg binding (reg => value, values.externals = regs at block start) binding = {} # Expr => CExpr ce = lambda { |*e| dcmp.decompile_cexpr(Expression[Expression[*e].reduce], scope) } # Expr => Expr.bind(binding) => CExpr ceb = lambda { |*e| ce[Expression[*e].bind(binding)] } # dumps a CExprs that implements an assignment to a reg (uses ops[], patches op => [reg, nil]) commit = lambda { deps[b].map { |k| [k, ops.rindex(ops.reverse.find { |r, v| r == k })] }.sort_by { |k, i| i.to_i }.each { |k, i| next if not i or not binding[k] e = k final = [] ops[0..i].reverse_each { |r, v| final << r if not v e = Expression[e].bind(r => v).reduce if not final.include? r } ops[i][1] = nil binding.delete k stmts << ce[k, :'=', e] if k != e } } # returns an array to use as funcall arguments get_func_args = lambda { |di, f| # XXX see remarks in #finddeps args_todo = f.type.args.to_a.dup args = [] args_todo.each { |a_| if r = a_.has_attribute_var('register') args << Expression[r.to_sym] else args << Expression[0] end } args.map { |e| ceb[e] } } # go ! dcmp.dasm.decoded[b].block.list.each_with_index { |di, didx| if di.opcode.props[:setip] and not di.opcode.props[:stopexec] # conditional jump commit[] n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) cc = ceb[:a, :'!=', 0] # XXX switch/indirect/multiple jmp stmts << C::If.new(C::CExpression[cc], C::Goto.new(n)) to.delete dcmp.dasm.normalize(n) next end case di.instruction.opname when 'ret' commit[] ret = nil ret = C::CExpression[ceb[:a]] unless func.type.type.kind_of? C::BaseType and func.type.type.name == :void stmts << C::Return.new(ret) when 'fcall' # :saveip n = dcmp.backtrace_target(get_xrefs_x(dcmp.dasm, di).first, di.address) args = [] if f = dcmp.c_parser.toplevel.symbol[n] and f.type.kind_of? C::Function and f.type.args args = get_func_args[di, f] end commit[] #next if not di.block.to_subfuncret if not n.kind_of? ::String or (f and not f.type.kind_of? C::Function) # indirect funcall fptr = ceb[n] binding.delete n proto = C::Function.new(C::BaseType.new(:int)) proto = f.type if f and f.type.kind_of? C::Function f = C::CExpression[[fptr], C::Pointer.new(proto)] elsif not f # internal functions are predeclared, so this one is extern f = C::Variable.new f.name = n f.type = C::Function.new(C::BaseType.new(:int)) if dcmp.recurse > 0 dcmp.c_parser.toplevel.symbol[n] = f dcmp.c_parser.toplevel.statements << C::Declaration.new(f) end end commit[] binding.delete :a e = C::CExpression[f, :funcall, args] e = C::CExpression[ce[:a], :'=', e, f.type.type] if deps[b].include? :a and f.type.type != C::BaseType.new(:void) stmts << e when 'in', 'out' if not dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"] dcmp.c_parser.parse("void intrinsic_#{di.instruction.opname}(unsigned int len, unsigned int channel, char *buf);") end f = dcmp.c_parser.toplevel.symbol["intrinsic_#{di.instruction.opname}"] stmts << C::CExpression.new(f, :funcall, [ceb[:a], ceb[:b], ceb[:c]], f.type.type) else bd = get_fwdemu_binding(di) if di.backtrace_binding[:incomplete_binding] commit[] stmts << C::Asm.new(di.instruction.to_s, nil, nil, nil, nil, nil) else update = {} bd.each { |k, v| if k.kind_of? ::Symbol and not deps[b].include? k ops << [k, v] update[k] = Expression[Expression[v].bind(binding).reduce] else stmts << ceb[k, :'=', v] stmts.pop if stmts.last.kind_of? C::Variable # [:eflag_s, :=, :unknown].reduce end } binding.update update end end } commit[] case to.length when 0 if not myblocks.empty? and not %w[ret jmp].include? dcmp.dasm.decoded[b].block.list.last.instruction.opname puts " block #{Expression[b]} has no to and don't end in ret" end when 1 if (myblocks.empty? ? nextaddr != to[0] : myblocks.first.first != to[0]) stmts << C::Goto.new(dcmp.dasm.auto_label_at(to[0], 'unknown_goto')) end else puts " block #{Expression[b]} with multiple to" end end # cleanup di.bt_binding (we set :frameptr etc in those, this may confuse the dasm) blocks_toclean.each { |b_, to_| dcmp.dasm.decoded[b_].block.list.each { |di| di.backtrace_binding = nil } } end
# File metasm/cpu/st20/decompile.rb, line 278 def decompile_check_abi(dcmp, entry, func) a = func.type.args || [] a.delete_if { |arg| arg.has_attribute_var('register') and arg.has_attribute('unused') } end
list variable dependency for each block, remove useless writes returns { blockaddr => [list of vars that are needed by a following block] }
# File metasm/cpu/st20/decompile.rb, line 32 def decompile_func_finddeps(dcmp, blocks, func) deps_r = {} ; deps_w = {} ; deps_to = {} deps_subfunc = {} # things read/written by subfuncs # find read/writes by each block blocks.each { |b, to| deps_r[b] = [] ; deps_w[b] = [] ; deps_to[b] = to deps_subfunc[b] = [] blk = dcmp.dasm.decoded[b].block blk.list.each { |di| a = di.backtrace_binding.values w = [] di.backtrace_binding.keys.each { |k| case k when ::Symbol; w |= [k] else a |= Expression[k].externals end } decompile_func_finddeps_di(dcmp, func, di, a, w) deps_r[b] |= a.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] - deps_w[b] deps_w[b] |= w.map { |ee| Expression[ee].externals.grep(::Symbol) }.flatten - [:unknown] } blk.each_to_normal { |t| t = dcmp.backtrace_target(t, blk.list.last.address) next if not t = dcmp.c_parser.toplevel.symbol[t] t.type = C::Function.new(C::BaseType.new(:int)) if not t.type.kind_of? C::Function t.type.args.to_a.each { |arg| if reg = arg.has_attribute('register') deps_subfunc[b] |= [reg.to_sym] end } } } bt = blocks.transpose roots = bt[0] - bt[1].flatten # XXX jmp 1stblock ? # find regs read and never written (must have been set by caller and are part of the func ABI) uninitialized = lambda { |b, r, done| if not deps_r[b] elsif deps_r[b].include?(r) true elsif deps_w[b].include?(r) else done << b (deps_to[b] - done).find { |tb| uninitialized[tb, r, done] } end } regargs = [] register_symbols.each { |r| if roots.find { |root| uninitialized[root, r, []] } regargs << r end } # TODO honor user-defined prototype if available (eg no, really, eax is not read in this function returning al) regargs.sort_by { |r| r.to_s }.each { |r| a = C::Variable.new(r.to_s, C::BaseType.new(:int, :unsigned)) a.add_attribute("register(#{r})") func.type.args << a } # remove writes from a block if no following block read the value dw = {} deps_w.each { |b, deps| dw[b] = deps.reject { |dep| ret = true done = [] todo = deps_to[b].dup while a = todo.pop next if done.include? a done << a if not deps_r[a] or deps_r[a].include? dep ret = false break elsif not deps_w[a].include? dep todo.concat deps_to[a] end end ret } } dw end
add di-specific registry written/accessed
# File metasm/cpu/st20/decompile.rb, line 21 def decompile_func_finddeps_di(dcmp, func, di, a, w) case di.instruction.opname when 'ret' a << :a if not func.type.kind_of? C::BaseType or func.type.type.name != :void # standard ABI when 'in', 'out' a << :a << :b << :c end end
temporarily setup dasm.address_binding so that backtracking stack-related offsets resolve in :frameptr (relative to func start)
# File metasm/cpu/st20/decompile.rb, line 13 def decompile_makestackvars(dasm, funcstart, blocks) oldfuncbd = dasm.address_binding[funcstart] dasm.address_binding[funcstart] = { :wspace => :frameptr } blocks.each { |block| yield block } dasm.address_binding[funcstart] = oldfuncbd end
# File metasm/cpu/st20/decode.rb, line 77 def get_backtrace_binding(di) arg = di.instruction.args[0] sz = @size/8 unk = Expression::Unknown case di.instruction.opname when 'j'; {} when 'ldlp'; { :a => Expression[:wspace, :+, [sz, :*, arg]], :b => :a, :c => :b } when 'ldnl'; { :a => Indirection[[:a, :+, [sz, :*, arg]], sz, di] } when 'ldc'; { :a => arg, :b => :a, :c => :b } when 'ldnlp'; { :a => Expression[:a, :+, [sz, :*, arg]] } when 'ldl'; { :a => Indirection[[:wspace, :+, [sz, :*, arg]], sz, di], :b => :a, :c => :b } when 'adc'; { :a => Expression[:a, :+, arg] } when 'fcall'; { :a => Expression[di.next_addr], :wspace => Expression[:wspace, :-, [4*sz]], Indirection[[:wspace, :-, [4*sz]], sz, di] => di.next_addr, Indirection[[:wspace, :-, [3*sz]], sz, di] => :a, Indirection[[:wspace, :-, [2*sz]], sz, di] => :b, Indirection[[:wspace, :-, [1*sz]], sz, di] => :c, } # cj+(:a != 0) => a=b, b=c, c=unk ; (:a == 0) => jump, a=a, b=b, c=c when 'cj'; { :a => unk, :b => unk, :c => unk } when 'ajw'; { :wspace => Expression[:wspace, :+, [4, :*, arg]] } when 'eqc'; { :a => Expression[:a, :==, arg] } when 'stl'; { Indirection[[:wspace, :+, [sz, :*, arg]], sz, di] => :a, :a => :b, :b => :c, :c => unk } when 'stnl'; { Indirection[[:a, :+, [sz, :*, arg]], sz, di] => :b, :a => :c, :b => unk, :c => unk } when 'add'; { :a => Expression[:b, :+, :a], :b => :c, :c => unk } when 'sub'; { :a => Expression[:b, :-, :a], :b => :c, :c => unk } when 'prod'; { :a => Expression[:b, :*, :a], :b => :c, :c => unk } when 'xor'; { :a => Expression[:b, :^, :a], :b => :c, :c => unk } when 'ldpi'; { :a => Indirection[[di.next_addr, :+, :a], sz, di] } when 'mint'; { :a => Expression[-1 << (@size-1)], :b => :a, :c => :b } when 'in'; { :a => unk, :b => unk, :c => unk } # read a bytes from channel b at buffer c when 'out'; { :a => unk, :b => unk, :c => unk } # write a bytes to channel b from buffer c when 'lb'; { :a => Indirection[:a, 1, di] } when 'sb'; { Indirection[:a, 1, di] => Expression[:b, :&, 0xff], :a => :c, :b => unk, :c => unk } when 'bsub'; { :a => Expression[:a, :+, :b], :b => :c, :c => unk } when 'ssub'; { :a => Expression[:a, :+, [2, :*, :b]], :b => :c, :c => unk } when 'wsub'; { :a => Expression[:a, :+, [sz, :*, :b]], :b => :c, :c => unk } when 'gajw'; { :wspace => Expression[:a], :a => Expression[:wspace] } when 'dup'; { :b => :a, :c => :b } else puts "unhandled instruction to backtrace: #{di}" if $VERBOSE { :incomplete_binding => Expression[1], :a => unk, :b => unk, :c => unk } end end
# File metasm/cpu/st20/decode.rb, line 125 def get_xrefs_x(dasm, di) return [] if not di.opcode.props[:setip] case di.opcode.basename when 'j', 'cj' [Expression[di.instruction.args.first]] #when 'ret' #[Indirection[:sp, 2, di.address]] else [] end end
# File metasm/cpu/st20/opcodes.rb, line 11 def init_opcodes @op_function = op_get_function @op_operate = op_get_operate @opcode_list = @op_function.sort.map { |b, n| op = Opcode.new(n, b) op.props[:setip] = true if n == 'cj' op.props[:setip] = op.props[:stopexec] = true if n == 'j' op.props[:setip] = op.props[:stopexec] = op.props[:saveip] = true if n == 'fcall' op } @opc_operate = {} op = Opcode.new('ret', 0) op.props[:setip] = op.props[:stopexec] = true @opc_operate['ret'] = op end
# File metasm/cpu/st20/opcodes.rb, line 27 def op_get_function { 0x00 => 'j', 0x10 => 'ldlp', 0x20 => 'pfix', 0x30 => 'ldnl', 0x40 => 'ldc', 0x50 => 'ldnlp', 0x60 => 'nfix', 0x70 => 'ldl', 0x80 => 'adc', 0x90 => 'fcall', 0xa0 => 'cj', 0xb0 => 'ajw', 0xc0 => 'eqc', 0xd0 => 'stl', 0xe0 => 'stnl', 0xf0 => 'opr' } end
# File metasm/cpu/st20/opcodes.rb, line 36 def op_get_operate { 0x00 => 'rev', 0x01 => 'dup', 0x02 => 'rot', 0x03 => 'arot', 0x04 => 'add', 0x05 => 'sub', 0x06 => 'mul', 0x07 => 'wsub', 0x08 => 'not', 0x09 => 'and', 0x0A => 'or', 0x0B => 'shl', 0x0C => 'shr', 0x0D => 'jab', 0x0E => 'timeslice', 0x0F => 'breakpoint', 0x10 => 'addc', 0x11 => 'subc', 0x12 => 'mac', 0x13 => 'umac', 0x14 => 'smul', 0x15 => 'smacinit', 0x16 => 'smacloop', 0x17 => 'biquad', 0x18 => 'divstep', 0x19 => 'unsign', 0x1A => 'saturate', 0x1B => 'gt', 0x1C => 'gtu', 0x1D => 'order', 0x1E => 'orderu', 0x1F => 'ashr', 0x20 => 'xor', 0x21 => 'xbword', 0x22 => 'xsword', 0x23 => 'bitld', 0x24 => 'bitst', 0x25 => 'bitmask', 0x26 => 'statusset', 0x27 => 'statusclr', 0x28 => 'statustst', 0x29 => 'rmw', 0x2A => 'lbinc', 0x2B => 'sbinc', 0x2C => 'lsinc', 0x2D => 'lsxinc', 0x2E => 'ssinc', 0x2F => 'lwinc', 0x30 => 'swinc', 0x31 => 'ecall', 0x32 => 'eret', 0x33 => 'run', 0x34 => 'stop', 0x35 => 'signal', 0x36 => 'wait', 0x37 => 'enqueue', 0x38 => 'dequeue', 0x39 => 'ldtdesc', 0x3A => 'ldpi', 0x3B => 'gajw', 0x3C => 'ldprodid', 0x3D => 'io', 0x3E => 'swap32', 0x3F => 'nop', } end
# File metasm/cpu/st20/main.rb, line 18 def register_symbols [:a, :b, :c] end
# File metasm/cpu/st20/main.rb, line 22 def render_instruction(i) r = [] r << i.opname if not i.args.empty? r << ' ' i.args.each { |a_| r << a_ << ', ' } r.pop end r end
updates an instruction's argument replacing an expression with another (eg label renamed)
# File metasm/cpu/st20/decode.rb, line 174 def replace_instr_arg_immediate(i, old, new) i.args.map! { |a| a == old ? new : Expression[a.bind(old => new).reduce] } end