each CPU defines a subclass of this one
list of unique labels generated (to recognize user-defined ones)
an ExeFormat (mostly used for unique label creation, and cpu.check_reserved_name)
map asm name -> original C name (for exports etc)
an array of assembler statements (strings)
# File metasm/compile_c.rb, line 176 def c_block(blk) c_block_enter(blk) blk.statements.each { |stmt| case stmt when CExpression; c_cexpr(stmt) when Declaration; c_decl(stmt.var) when If; c_ifgoto(stmt.test, stmt.bthen.target) when Goto; c_goto(stmt.target) when Label; c_label(stmt.name) when Return; c_return(stmt.value) when Asm; c_asm(stmt) when Block; c_block(stmt) else raise end } c_block_exit(blk) end
# File metasm/compile_c.rb, line 194 def c_block_enter(blk) end
# File metasm/compile_c.rb, line 197 def c_block_exit(blk) end
compiles a C function func
to asm
source into the array of strings str
in a first pass the stack
variable offsets are computed, then each statement is compiled in turn
# File metasm/compile_c.rb, line 153 def c_function(func) # must wait the Declaration to run the CExpr for dynamic auto offsets, # and must run those statements once only # TODO alloc a stack variable to maintain the size for each dynamic array # TODO offset of arguments # TODO nested function c_init_state(func) # hide the full @source while compiling, then add prolog/epilog (saves 1 pass) @source << '' @source << "#{@label_oldname[func.name]}:" if @label_oldname[func.name] @source << "#{func.name}:" presource, @source = @source, [] c_block(func.initializer) tmpsource, @source = @source, presource c_prolog @source.concat tmpsource c_epilog @source << '' end
compiles a C static data definition into an asm string returns the new alignment value
# File metasm/compile_c.rb, line 251 def c_idata(data, align) w = data.type.align(@parser) @source << ".align #{align = w}" if w > align @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] @source << data.name.dup len = c_idata_inner(data.type, data.initializer) len = w len == 0 ? w : len end
dumps an anonymous variable definition, appending to the last line of source source.last is a label name or is empty before calling here return the length of the data written
# File metasm/compile_c.rb, line 265 def c_idata_inner(type, value) case type when BaseType value ||= 0 if type.name == :void @source.last << ':' if not @source.last.empty? return 0 end @source.last << case type.name when :__int8; ' db ' when :__int16; ' dw ' when :__int32; ' dd ' when :__int64; ' dq ' when :ptr; " d#{%w[x b w x d x x x q][@parser.typesize[type.name]]} " when :float; ' db ' + [value].pack(@parser.endianness == :little ? 'e' : 'g').unpack('C*').join(', ') + ' // ' when :double; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' when :longdouble; ' db ' + [value].pack(@parser.endianness == :little ? 'E' : 'G').unpack('C*').join(', ') + ' // ' # XXX same as :double else raise "unknown idata type #{type.inspect} #{value.inspect}" end @source.last << c_idata_inner_cexpr(value) @parser.typesize[type.name] when Struct value ||= [] @source.last << ':' if not @source.last.empty? # could .align here, but if there is our label name just before, it should have been .aligned too.. raise "unknown struct initializer #{value.inspect}" if not value.kind_of? ::Array sz = 0 type.members.zip(value).each { |m, v| if m.name and wsz = type.offsetof(@parser, m.name) and sz < wsz @source << "db #{wsz-sz} dup(?)" end @source << '' flen = c_idata_inner(m.type, v) sz += flen } sz when Union value ||= [] @source.last << ':' if not @source.last.empty? len = sizeof(nil, type) raise "unknown union initializer #{value.inspect}" if not value.kind_of? ::Array idx = value.rindex(value.compact.last) || 0 raise "empty union initializer" if not idx wlen = c_idata_inner(type.members[idx].type, value[idx]) @source << "db #{'0' * (len - wlen) * ', '}" if wlen < len len when Array value ||= [] if value.kind_of? CExpression and not value.op and value.rexpr.kind_of? ::String elen = sizeof(nil, value.type.type) @source.last << case elen when 1; ' db ' when 2; ' dw ' else raise 'bad char* type ' + value.inspect end << value.rexpr.inspect len = type.length || (value.rexpr.length+1) if len > value.rexpr.length @source.last << (', 0' * (len - value.rexpr.length)) end elen * len elsif value.kind_of? ::Array @source.last << ':' if not @source.last.empty? len = type.length || value.length value.each { |v| @source << '' c_idata_inner(type.type, v) } len -= value.length if len > 0 @source << " db #{len * sizeof(nil, type.type)} dup(0)" end sizeof(nil, type.type) * len else raise "unknown static array initializer #{value.inspect}" end end end
# File metasm/compile_c.rb, line 358 def c_idata_inner_cexpr(expr) expr = expr.reduce(@parser) if expr.kind_of? CExpression case expr when ::Integer; (expr >= 4096) ? ('0x%X' % expr) : expr.to_s when ::Numeric; expr.to_s when Variable case expr.type when Array; expr.name else c_idata_inner_cexpr(expr.initializer) end when CExpression if not expr.lexpr case expr.op when :& case expr.rexpr when Variable; expr.rexpr.name else raise 'unhandled addrof in initializer ' + expr.rexpr.inspect end #when :* when :+; c_idata_inner_cexpr(expr.rexpr) when :-; ' -' << c_idata_inner_cexpr(expr.rexpr) when nil e = c_idata_inner_cexpr(expr.rexpr) if expr.rexpr.kind_of? CExpression e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" end e else raise 'unhandled initializer expr ' + expr.inspect end else case expr.op when :+, :-, :*, :/, :%, :<<, :>>, :&, :|, :^ e = '(' << c_idata_inner_cexpr(expr.lexpr) << expr.op.to_s << c_idata_inner_cexpr(expr.rexpr) << ')' if expr.type.integral? # db are unsigned e = '(' << e << " & 0#{'ff'*sizeof(expr)}h)" end e #when :'.' #when :'->' #when :'[]' else raise 'unhandled initializer expr ' + expr.inspect end end else raise 'unhandled initializer ' + expr.inspect end end
# File metasm/compile_c.rb, line 200 def c_label(name) @source << "#{name}:" end
here you can add thing like stubs for PIC code
# File metasm/compile_c.rb, line 246 def c_program_epilog end
fills @state.offset (empty hash) automatic variable => stack offset, (recursive) offset is an ::Integer or a CExpression (dynamic array) assumes offset 0 is a ptr-size-aligned address TODO registerize automatic variables
# File metasm/compile_c.rb, line 209 def c_reserve_stack(block, off = 0) block.statements.each { |stmt| case stmt when Declaration next if stmt.var.type.kind_of? Function off = c_reserve_stack_var(stmt.var, off) @state.offset[stmt.var] = off when Block c_reserve_stack(stmt, off) # do not update off, not nested subblocks can overlap end } end
computes the new stack offset for var off is either an offset from stack start (:ptr-size-aligned) or
a CExpression [[[expr, +, 7], &, -7], +, off]
# File metasm/compile_c.rb, line 226 def c_reserve_stack_var(var, off) if (arr_type = var.type).kind_of? Array and (arr_sz = arr_type.length).kind_of? CExpression # dynamic array ! arr_sz = CExpression.new(arr_sz, :*, sizeof(nil, arr_type.type), BaseType.new(:long, :unsigned)).precompile_inner(@parser, nil) off = CExpression.new(arr_sz, :+, off, arr_sz.type) off = CExpression.new(off, :+, 7, off.type) off = CExpression.new(off, :&, -7, off.type) CExpression.new(off, :+, 0, off.type) else al = var.type.align(@parser) sz = sizeof(var) case off when CExpression; CExpression.new(off.lexpr, :+, ((off.rexpr + sz + al - 1) / al * al), off.type) else (off + sz + al - 1) / al * al end end end
# File metasm/compile_c.rb, line 407 def c_udata(data, align) @source << "#{@label_oldname[data.name]}:" if @label_oldname[data.name] @source << "#{data.name} " @source.last << case data.type when BaseType len = @parser.typesize[data.type.name] case data.type.name when :__int8; 'db ?' when :__int16; 'dw ?' when :__int32; 'dd ?' when :__int64; 'dq ?' else "db #{len} dup(?)" end else len = sizeof(data) "db #{len} dup(?)" end len = align len == 0 ? align : len end
return non-nil if the variable name is unsuitable to appear as is in the asm listing eg filter out asm instruction names
# File metasm/compile_c.rb, line 431 def check_reserved_name(var) return true if @exeformat.cpu and @exeformat.cpu.check_reserved_name(var.name) %w[db dw dd dq].include?(var.name) end
compiles the c parser toplevel to assembler statements in self.source (::Array of ::String)
starts by precompiling parser.toplevel (destructively): static symbols are converted to toplevel ones, as nested functions uses an ExeFormat (the argument) to create unique label/variable names
remove typedefs/enums CExpressions: all expr types are converted to __int8/__int16/__int32/__int64 (sign kept) (incl. ptr), + void
struct member dereference/array indexes are converted to *(ptr + off) coma are converted to 2 statements, ?: are converted to If :|| and :&& are converted to If + assignment to temporary immediate quotedstrings/floats are converted to references to const static toplevel postincrements are replaced by a temporary (XXX arglist) compound statements are unnested
Asm are kept (TODO precompile clobber types) Declarations: initializers are converted to separate assignment CExpressions Blocks are kept unless empty structure dereferences/array indexing are converted to *(ptr + offset) While/For/DoWhile/Switch are converted to If/Goto Continue/Break are converted to Goto Cases are converted to Labels during Switch conversion Label statements are removed Return: 'return <foo>;' => 'return <foo>; goto <end_of_func>;', 'return;' => 'goto <eof>;' If: 'if (a) b; else c;' => 'if (a) goto l1; { c; }; goto l2; l1: { b; } l2:'
&& and || in condition are expanded to multiple If
functions returning struct are precompiled (in Declaration/CExpression/Return)
in a second phase, unused labels are removed from functions, as noop goto (goto x; x:) dead code is removed ('goto foo; bar; baz:' => 'goto foo; baz:') (TODO)
after that, toplevel is no longer valid C (bad types, blocks moved…)
then toplevel statements are sorted (.text, .data, .rodata, .bss) and compiled into asm statements in self.source
returns the asm source in a single string
# File metasm/compile_c.rb, line 91 def compile cf = @exeformat.unique_labels_cache.keys & @auto_label_list.keys raise "compile_c name conflict: #{cf.inspect}" if not cf.empty? @exeformat.unique_labels_cache.update @auto_label_list @parser.toplevel.precompile(self) # reorder statements (arrays of Variables) following exe section typical order funcs, rwdata, rodata, udata = [], [], [], [] @parser.toplevel.statements.each { |st| if st.kind_of? Asm @source << st.body next end raise 'non-declaration at toplevel! ' + st.inspect if not st.kind_of? Declaration v = st.var if v.type.kind_of? Function funcs << v if v.initializer # no initializer == storage :extern elsif v.storage == :extern elsif v.initializer if v.type.qualifier.to_a.include?(:const) or (v.type.kind_of? Array and v.type.type.qualifier.to_a.include?(:const)) rodata << v else rwdata << v end else udata << v end } if not funcs.empty? @exeformat.compile_setsection @source, '.text' funcs.each { |func| c_function(func) } c_program_epilog end align = 1 if not rwdata.empty? @exeformat.compile_setsection @source, '.data' rwdata.each { |data| align = c_idata(data, align) } end if not rodata.empty? @exeformat.compile_setsection @source, '.rodata' rodata.each { |data| align = c_idata(data, align) } end if not udata.empty? @exeformat.compile_setsection @source, '.bss' udata.each { |data| align = c_udata(data, align) } end # needed to allow asm parser to use our autogenerated label names @exeformat.unique_labels_cache.delete_if { |k, v| @auto_label_list[k] } @source.join("\n") end
allows 'raise self' (eg struct.offsetof)
# File metasm/compile_c.rb, line 34 def exception(msg='EOF unexpected') ParseError.new "near #@curexpr: #{msg}" end
# File metasm/compile_c.rb, line 46 def new_label(base='') lbl = @exeformat.new_label base @auto_label_list[lbl] = true lbl end
# File metasm/compile_c.rb, line 54 def sizeof(*a) @parser.sizeof(*a) end
# File metasm/compile_c.rb, line 52 def toplevel ; @parser.toplevel end
# File metasm/compile_c.rb, line 53 def typesize ; @parser.typesize end