libdwarf/dwarf.h
for tags between DT_LOPROC and DT_HIPROC, use DT_PROC[tag-DT_LOPROC]
# File metasm/exe_format/elf.rb, line 639 def self.gnu_hash_symbol_name(name) name.unpack('C*').inject(5381) { |hash, char| break hash if char == 0 (hash*33 + char) & 0xffff_ffff } end
# File metasm/exe_format/elf.rb, line 629 def self.hash_symbol_name(name) name.unpack('C*').inject(0) { |hash, char| break hash if char == 0 hash <<= 4 hash += char hash ^= (hash >> 24) & 0xf0 hash &= 0x0fff_ffff } end
# File metasm/exe_format/elf.rb, line 647 def initialize(cpu=nil) @header = Header.new @tag = {} @symbols = [Symbol32.new] @symbols.first.shndx = 'UNDEF' @relocations = [] @sections = [Section.new] @sections.first.type = 'NULL' @segments = [] if cpu @endianness = cpu.endianness @bitsize = cpu.size else @endianness = :little @bitsize = 32 end super(cpu) end
make an export of self.encoded
, returns the label name if
successful
# File metasm/exe_format/elf_decode.rb, line 95 def add_label(name, addr) if not o = addr_to_off(addr) puts "W: Elf: #{name} points to unmmaped space #{'0x%08X' % addr}" if $VERBOSE else l = new_label(name) @encoded.add_export l, o end l end
memory address -> file offset handles relocated LoadedELF
# File metasm/exe_format/elf_decode.rb, line 67 def addr_to_fileoff(addr) la = module_address la = (la == 0 ? (@load_address ||= 0) : 0) addr_to_off(addr - la) end
transforms a virtual address to a file offset, from mmaped segments addresses
# File metasm/exe_format/elf_decode.rb, line 60 def addr_to_off(addr) s = @segments.find { |s_| s_.type == 'LOAD' and s_.vaddr <= addr and s_.vaddr + s_.memsz > addr } if addr addr - s.vaddr + s.offset if s end
references to FUNC symbols are transformed to JMPSLOT relocations (aka call to .plt) TODO ET_REL support
# File metasm/exe_format/elf_encode.rb, line 666 def arch_386_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] if rel.endianness != @endianness or not [:u32, :i32, :a32].include? rel.type puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: bad reloc type" if $VERBOSE return end startaddr = label_at(@encoded, 0) r = Relocation.new r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF r.type = 'RELATIVE' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: 386_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym rel.target = Expression[rel.target, :-, sym.name] if rel.target.bind(binding).reduce.kind_of? ::Integer r.type = '32' elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] r.type = 'PC32' # TODO tls ? else puts "ELF: 386_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end @relocations << r end
creates the .plt/.got from the @relocations
# File metasm/exe_format/elf_encode.rb, line 348 def arch_386_preencode_reloc return if @relocations.empty? # if .got.plt does not exist, the dynamic loader segfaults if not gotplt = @sections.find { |s| s.type == 'PROGBITS' and s.name == '.got.plt' } gotplt = Section.new gotplt.name = '.got.plt' gotplt.type = 'PROGBITS' gotplt.flags = %w[ALLOC WRITE] gotplt.addralign = @bitsize/8 # _DYNAMIC is not base-relocated at runtime encode_add_section gotplt end gotplt.encoded ||= (EncodedData.new('', :export => {'_PLT_GOT' => 0}) << encode_xword('_DYNAMIC') << encode_xword(0) << encode_xword(0)) @tag['PLTGOT'] = label_at(gotplt.encoded, 0) plt = nil shellcode = lambda { |c| Shellcode.new(@cpu).share_namespace(self).assemble(c).encoded } @relocations.dup.each { |r| case r.type when 'PC32' next if not r.symbol if r.symbol.type != 'FUNC' # external data xref: generate a GOT entry # XXX reuse .got.plt ? if not got ||= @sections.find { |s| s.type == 'PROGBITS' and s.name == '.got' } got = Section.new got.name = '.got' got.type = 'PROGBITS' got.flags = %w[ALLOC WRITE] got.addralign = @bitsize/8 got.encoded = EncodedData.new encode_add_section got end prevoffset = r.offset gotlabel = r.symbol.name + '_got_entry' if not got.encoded.export[gotlabel] # create the got thunk got.encoded.add_export(gotlabel, got.encoded.length) got.encoded << encode_xword(0) # transform the reloc PC32 => GLOB_DAT r.type = 'GLOB_DAT' r.offset = Expression[gotlabel] r.addend = 0 if @bitsize == 64 else @relocations.delete r end # prevoffset is label_section_start + int_section_offset target_s = @sections.find { |s| s.encoded and s.encoded.export[prevoffset.lexpr] == 0 } rel = target_s.encoded.reloc[prevoffset.rexpr] # [foo] => [foo - reloc_addr + gotlabel] rel.target = Expression[[rel.target, :-, prevoffset], :+, gotlabel] next end # convert to .plt entry # # [.plt header] # plt_start: # caller set ebx = gotplt if generate_PIC # push [gotplt+4] # jmp [gotplt+8] # # [.plt thunk] # some_func_thunk: # jmp [gotplt+func_got_offset] # some_func_got_default: # push some_func_jmpslot_offset_in_.rel.plt # jmp plt_start # # [.got.plt header] # dd _DYNAMIC # dd 0 # rewritten to GOTPLT? by ld-linux # dd 0 # rewritten to dlresolve_inplace by ld-linux # # [.got.plt + func_got_offset] # dd some_func_got_default # lazily rewritten to the real addr of some_func by jmp dlresolve_inplace # # base_relocated ? # in the PIC case, _dlresolve imposes us to use the ebx register (which may not be saved by the calling function..) # also geteip trashes eax, which may interfere with regparm(3) base = @cpu.generate_PIC ? @bitsize == 32 ? 'ebx' : 'rip-$_+_PLT_GOT' : '_PLT_GOT' if not plt ||= @sections.find { |s| s.type == 'PROGBITS' and s.name == '.plt' } plt = Section.new plt.name = '.plt' plt.type = 'PROGBITS' plt.flags = %w[ALLOC EXECINSTR] plt.addralign = @bitsize/8 plt.encoded = EncodedData.new sz = @bitsize/8 ptqual = @bitsize == 32 ? 'dword' : 'qword' plt.encoded << shellcode["metasm_plt_start:\npush #{ptqual} ptr [#{base}+#{sz}]\njmp #{ptqual} ptr [#{base}+#{2*sz}]"] if @cpu.generate_PIC and @bitsize == 32 and not @sections.find { |s| s.encoded and s.encoded.export['metasm_intern_geteip'] } plt.encoded << shellcode["metasm_intern_geteip:\ncall 42f\n42: pop eax\nsub eax, 42b-metasm_intern_geteip\nret"] end encode_add_section plt end prevoffset = r.offset pltlabel = r.symbol.name + '_plt_thunk' if not plt.encoded.export[pltlabel] # create the plt thunk plt.encoded.add_export pltlabel, plt.encoded.length if @cpu.generate_PIC and @bitsize == 32 plt.encoded << shellcode["call metasm_intern_geteip\nlea #{base}, [eax+_PLT_GOT-metasm_intern_geteip]"] end plt.encoded << shellcode["jmp [#{base} + #{gotplt.encoded.length}]"] plt.encoded.add_export r.symbol.name+'_plt_default', plt.encoded.length reloffset = @relocations.find_all { |rr| rr.type == 'JMP_SLOT' }.length reloffset *= Relocation.sizeof(self) if @bitsize == 32 plt.encoded << shellcode["push #{reloffset}\njmp metasm_plt_start"] # transform the reloc PC32 => JMP_SLOT r.type = 'JMP_SLOT' r.offset = Expression['_PLT_GOT', :+, gotplt.encoded.length] r.addend = 0 if @bitsize == 64 gotplt.encoded << encode_xword(r.symbol.name + '_plt_default') else @relocations.delete r end # mutate the original relocation # XXX relies on the exact form of r.target from arch_create_reloc target_s = @sections.find { |s| s.encoded and s.encoded.export[prevoffset.lexpr] == 0 } rel = target_s.encoded.reloc[prevoffset.rexpr] rel.target = Expression[[[rel.target, :-, prevoffset.rexpr], :-, label_at(target_s.encoded, 0)], :+, pltlabel] # when 'GOTOFF', 'GOTPC' end } encode_check_section_size gotplt encode_check_section_size plt if plt #encode_check_section_size got if got end
returns the Metasm::Relocation that should be applied for reloc self.encoded.ptr must point to the location that will be relocated (for implicit addends)
# File metasm/exe_format/elf_decode.rb, line 566 def arch_decode_segments_reloc_386(reloc) if reloc.symbol.kind_of?(Symbol) and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr } @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true) end # decode addend if needed case reloc.type when 'NONE', 'COPY', 'GLOB_DAT', 'JMP_SLOT' # no addend else addend = reloc.addend || decode_sword end case reloc.type when 'NONE' when 'RELATIVE' # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000 # compiled to be loaded at seg.vaddr target = addend if o = addr_to_off(target) if not label = @encoded.inv_export[o] label = new_label("xref_#{Expression[target]}") @encoded.add_export label, o end target = label else puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE end when 'GLOB_DAT', 'JMP_SLOT', '32', 'PC32', 'TLS_TPOFF', 'TLS_TPOFF32' # XXX use versionned version # lazy jmp_slot ? target = reloc_target(reloc) target = Expression[target, :-, reloc.offset] if reloc.type == 'PC32' target = Expression[target, :+, addend] if addend and addend != 0 target = Expression[target, :+, 'tlsoffset'] if reloc.type == 'TLS_TPOFF' target = Expression[:-, [target, :+, 'tlsoffset']] if reloc.type == 'TLS_TPOFF32' when 'COPY' # mark the address pointed as a copy of the relocation target if not reloc.symbol.kind_of?(Symbol) or not name = reloc.symbol.name puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE name = '' end name = new_label("copy_of_#{name}") @encoded.add_export name, @encoded.ptr target = nil else puts "W: Elf: unhandled 386 reloc #{reloc.inspect}" if $VERBOSE target = nil end Metasm::Relocation.new(Expression[target], :u32, @endianness) if target end
returns the Metasm::Relocation that should be applied for reloc self.encoded.ptr must point to the location that will be relocated (for implicit addends)
# File metasm/exe_format/elf_decode.rb, line 620 def arch_decode_segments_reloc_mips(reloc) if reloc.symbol.kind_of?(Symbol) and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr } @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true) end original_word = decode_word # decode addend if needed case reloc.type when 'NONE' # no addend else addend = reloc.addend || Expression.make_signed(original_word, 32) end case reloc.type when 'NONE' when '32', 'REL32' target = reloc_target(reloc) target = Expression[target, :-, reloc.offset] if reloc.type == 'REL32' target = Expression[target, :+, addend] if addend and addend != 0 when '26' target = reloc_target(reloc) addend &= 0x3ff_ffff target = Expression[target, :+, [addend, :<<, 2]] if addend and addend != 0 target = Expression[[original_word, :&, 0xfc0_0000], :|, [[target, :&, 0x3ff_ffff], :>>, 2]] when 'HI16' target = reloc_target(reloc) addend &= 0xffff target = Expression[target, :+, [addend, :<<, 16]] if addend and addend != 0 target = Expression[[original_word, :&, 0xffff_0000], :|, [[target, :>>, 16], :&, 0xffff]] when 'LO16' target = reloc_target(reloc) addend &= 0xffff target = Expression[target, :+, addend] if addend and addend != 0 target = Expression[[original_word, :&, 0xffff_0000], :|, [target, :&, 0xffff]] else puts "W: Elf: unhandled MIPS reloc #{reloc.inspect}" if $VERBOSE target = nil end Metasm::Relocation.new(Expression[target], :u32, @endianness) if target end
# File metasm/exe_format/elf_decode.rb, line 718 def arch_decode_segments_reloc_sh(reloc) if reloc.symbol.kind_of?(Symbol) and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr } @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true) end original_word = decode_word # decode addend if needed case reloc.type when 'NONE' # no addend else addend = reloc.addend || Expression.make_signed(original_word, 32) end case reloc.type when 'NONE' when 'GLOB_DAT', 'JMP_SLOT' target = reloc_target(reloc) target = Expression[target, :+, addend] if addend and addend != 0 else puts "W: Elf: unhandled SH reloc #{reloc.inspect}" if $VERBOSE target = nil end Metasm::Relocation.new(Expression[target], :u32, @endianness) if target end
returns the Metasm::Relocation that should be applied for reloc self.encoded.ptr must point to the location that will be relocated (for implicit addends)
# File metasm/exe_format/elf_decode.rb, line 665 def arch_decode_segments_reloc_x86_64(reloc) if reloc.symbol.kind_of?(Symbol) and n = reloc.symbol.name and reloc.symbol.shndx == 'UNDEF' and @sections and s = @sections.find { |s_| s_.name and s_.offset <= @encoded.ptr and s_.offset + s_.size > @encoded.ptr } @encoded.add_export(new_label("#{s.name}_#{n}"), @encoded.ptr, true) end # decode addend if needed case reloc.type when 'NONE' # no addend when '32', 'PC32'; addend = reloc.addend || decode_sword else addend = reloc.addend || decode_sxword end sz = :u64 case reloc.type when 'NONE' when 'RELATIVE' # base = @segments.find_all { |s| s.type == 'LOAD' }.map { |s| s.vaddr }.min & 0xffff_f000 # compiled to be loaded at seg.vaddr target = addend if o = addr_to_off(target) if not label = @encoded.inv_export[o] label = new_label("xref_#{Expression[target]}") @encoded.add_export label, o end target = label else puts "W: Elf: relocation pointing out of mmaped space #{reloc.inspect}" if $VERBOSE end when 'GLOB_DAT', 'JMP_SLOT', '64', 'PC64', '32', 'PC32' # XXX use versionned version # lazy jmp_slot ? target = reloc_target(reloc) target = Expression[target, :-, reloc.offset] if reloc.type == 'PC64' or reloc.type == 'PC32' target = Expression[target, :+, addend] if addend and addend != 0 sz = :u32 if reloc.type == '32' or reloc.type == 'PC32' when 'COPY' # mark the address pointed as a copy of the relocation target if not reloc.symbol.kind_of?(Symbol) or not name = reloc.symbol.name puts "W: Elf: symbol to COPY has no name: #{reloc.inspect}" if $VERBOSE name = '' end name = new_label("copy_of_#{name}") @encoded.add_export name, @encoded.ptr target = nil else puts "W: Elf: unhandled X86_64 reloc #{reloc.inspect}" if $VERBOSE target = nil end Metasm::Relocation.new(Expression[target], sz, @endianness) if target end
# File metasm/exe_format/elf_encode.rb, line 751 def arch_mips_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] startaddr = label_at(@encoded, 0) r = Relocation.new r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF r.type = 'REL32' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: mips_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: mips_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym if Expression[rel.target, :-, sym.name].bind(binding).reduce.kind_of?(::Integer) rel.target = Expression[rel.target, :-, sym.name] r.type = '32' elsif Expression[rel.target, :&, 0xffff0000].reduce.kind_of?(::Integer) lo = Expression[rel.target, :&, 0xffff].reduce lo = lo.lexpr if lo.kind_of?(Expression) and lo.op == :& and lo.rexpr == 0xffff if lo.kind_of?(Expression) and lo.op == :>> and lo.rexpr == 16 r.type = 'HI16' rel.target = Expression[rel.target, :&, 0xffff0000] # XXX offset ? elsif lo.kind_of?(String) or (lo.kind_of(Expression) and lo.op == :+) r.type = 'LO16' rel.target = Expression[rel.target, :&, 0xffff0000] # XXX offset ? else puts "ELF: mips_create_reloc: ignoring reloc #{lo}: cannot find matching 16 reloc type" if $VERBOSE return end #elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer # rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] # r.type = 'PC32' else puts "ELF: mips_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end @relocations << r end
# File metasm/exe_format/elf_encode.rb, line 705 def arch_x86_64_create_reloc(section, off, binding, rel=nil) rel ||= section.encoded.reloc[off] if rel.endianness != @endianness or not rel.type.to_s =~ /^[aiu](32|64)$/ puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: bad reloc type" if $VERBOSE return end startaddr = label_at(@encoded, 0) r = RelocationAddend.new r.addend = 0 r.offset = Expression[label_at(section.encoded, 0, 'sect_start'), :+, off] if Expression[rel.target, :-, startaddr].bind(binding).reduce.kind_of?(::Integer) # this location is relative to the base load address of the ELF if rel.length != 8 puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: relative non-x64" if $VERBOSE return end r.type = 'RELATIVE' else et = rel.target.externals extern = et.find_all { |name| not binding[name] } if extern.length != 1 puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: #{extern.inspect} unknown" if $VERBOSE return end if not sym = @symbols.find { |s| s.name == extern.first } puts "ELF: x86_64_create_reloc: ignoring reloc #{rel.target} in #{section.name}: undefined symbol #{extern.first}" if $VERBOSE return end r.symbol = sym rel.target = Expression[rel.target, :-, sym.name] if rel.target.bind(binding).reduce.kind_of? ::Integer r.type = '64' # XXX check that elsif Expression[rel.target, :+, label_at(section.encoded, 0)].bind(section.encoded.binding).reduce.kind_of? ::Integer rel.target = Expression[[rel.target, :+, label_at(section.encoded, 0)], :+, off] r.type = 'PC32' # XXX # TODO tls ? else puts "ELF: x86_64_create_reloc: ignoring reloc #{sym.name} + #{rel.target}: cannot find matching standard reloc type" if $VERBOSE return end end r.addend = Expression[rel.target] #section.encoded.reloc.delete off @relocations << r end
assembles the hash self.source to a section array
# File metasm/exe_format/elf_encode.rb, line 1353 def assemble(*a) parse(*a) if not a.empty? @source.each { |k, v| raise "no section named #{k} ?" if not s = @sections.find { |s_| s_.name == k } s.encoded << assemble_sequence(v, @cpu) v.clear } end
creates the undef symbol list from the section.encoded.reloc and a list of known exported symbols (e.g. from libc) also populates @tag
# File metasm/exe_format/elf_encode.rb, line 606 def automagic_symbols GNUExports rescue return # autorequire autoexports = GNUExports::EXPORT.dup @sections.each { |s| next if not s.encoded s.encoded.export.keys.each { |e| autoexports.delete e } } @sections.each { |s| next if not s.encoded s.encoded.reloc.each_value { |r| et = r.target.externals extern = et.find_all { |name| autoexports[name] } next if extern.length != 1 symname = extern.first if not @symbols.find { |sym| sym.name == symname } @tag['NEEDED'] ||= [] @tag['NEEDED'] |= [autoexports[symname]] sym = Symbol.new sym.shndx = 'UNDEF' sym.type = 'FUNC' sym.name = symname sym.bind = 'GLOBAL' @symbols << sym end } } end
# File metasm/exe_format/elf_encode.rb, line 1413 def c_set_default_entrypoint return if @header.entry if @sections.find { |s| s.encoded and s.encoded.export['_start'] } @header.entry = '_start' elsif @sections.find { |s| s.encoded and s.encoded.export['main'] } # entrypoint stack: [sp] = argc, [sp+1] = argv0, [sp+2] = argv1, [sp+argc+1] = 0, [sp+argc+2] = envp0, etc case @cpu.shortname when 'ia32'; assemble <<EOS _start: mov eax, [esp] lea ecx, [esp+4+4*eax+4] push ecx lea ecx, [esp+4+4] push ecx push eax call main push eax call _exit EOS when 'x64'; assemble <<EOS _start: mov rdi, [rsp] lea rsi, [rsp+8] lea rdx, [rsi+8*rdi+8] call main mov rdi, rax call _exit EOS else compile_c <<EOS void _exit(int); int main(int, char**, char**); void _start(void) { _exit(main(0, 0, 0)); } EOS end @header.entry = '_start' end end
checks every symbol's accessibility through the gnu_hash table
# File metasm/exe_format/elf_decode.rb, line 183 def check_symbols_gnu_hash(off = @tag['GNU_HASH'], just_get_count=false) return if not @encoded.ptr = off # when present: the symndx first symbols are not sorted (SECTION/LOCAL/FILE/etc) symtable[symndx] is sorted (1st sorted symbol) # the sorted symbols are sorted by [gnu_hash_symbol_name(symbol.name) % hash_bucket_len] hash_bucket_len = decode_word symndx = decode_word # index of first sorted symbol in symtab maskwords = decode_word # number of words in the second part of the ghash section (32 or 64 bits) shift2 = decode_word # used in the bloom filter bloomfilter = [] ; maskwords.times { bloomfilter << decode_xword } # "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class) # ((gnu_hash(sym[M].name) / C) % maskwords) == N && # ((gnu_hash(sym[M].name) % C) == B || # ((gnu_hash(sym[M].name) >> shift2) % C) == B" # bloomfilter may be [~0] if shift2 end hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word } # bucket[N] contains the lowest M for which # gnu_hash(sym[M]) % nbuckets == N # or 0 if none hsymcount = 0 part4 = [] hash_bucket.each { |hmodidx| # for each bucket, walk all the chain # we do not walk the chains in hash_bucket order here, this # is just to read all the part4 as we don't know # beforehand the number of hashed symbols next if hmodidx == 0 # no hash chain for this mod loop do fu = decode_word hsymcount += 1 part4 << fu break if fu & 1 == 1 end } # part4[N] contains # (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket)) # that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash] # we're going to decode the symbol table, and we just want to get the nr of symbols to read if just_get_count # index of highest hashed (exported) symbols ns = hsymcount+symndx # no way to get the number of non-exported symbols from what we have here # so we'll decode all relocs and use the largest index we see.. rels = [] if @encoded.ptr = @tag['REL'] and @tag['RELENT'] == Relocation.sizeof(self) p_end = @encoded.ptr + @tag['RELSZ'] while @encoded.ptr < p_end rels << Relocation.decode(self) end end if @encoded.ptr = @tag['RELA'] and @tag['RELAENT'] == RelocationAddend.sizeof(self) p_end = @encoded.ptr + @tag['RELASZ'] while @encoded.ptr < p_end rels << RelocationAddend.decode(self) end end if @encoded.ptr = @tag['JMPREL'] and relcls = case @tag['PLTREL'] when 'REL'; Relocation when 'RELA'; RelocationAddend end p_end = @encoded.ptr + @tag['PLTRELSZ'] while @encoded.ptr < p_end rels << relcls.decode(self) end end maxr = rels.map { |rel| rel.symbol }.grep(::Integer).max || -1 return [ns, maxr+1].max end # TODO end
read the dynamic symbols hash table, and checks that every global and named symbol is accessible through it outputs a warning if it's not and $VERBOSE is set
# File metasm/exe_format/elf_decode.rb, line 153 def check_symbols_hash(off = @tag['HASH']) return if not @encoded.ptr = off hash_bucket_len = decode_word sym_count = decode_word hash_bucket = [] ; hash_bucket_len.times { hash_bucket << decode_word } hash_table = [] ; sym_count.times { hash_table << decode_word } @symbols.each { |s| next if not s.name or s.bind != 'GLOBAL' or s.shndx == 'UNDEF' found = false h = ELF.hash_symbol_name(s.name) off = hash_bucket[h % hash_bucket_len] sym_count.times { # to avoid DoS by loop break if off == 0 if ss = @symbols[off] and ss.name == s.name found = true break end off = hash_table[off] } if not found puts "W: Elf: Symbol #{s.name.inspect} not found in hash table" if $VERBOSE end } end
returns a metasm CPU object corresponding to
header.machine
# File metasm/exe_format/elf_decode.rb, line 929 def cpu_from_headers case @header.machine when 'X86_64'; X86_64.new when '386'; Ia32.new when 'MIPS'; (@header.flags.include?('32BITMODE') ? MIPS64 : MIPS).new @endianness when 'PPC'; PPC.new when 'ARM'; ARM.new when 'AARCH64'; AArch64.new when 'SH'; Sh4.new when 'ARC_COMPACT'; ARC.new when 'MSP430'; MSP430.new else raise "unsupported cpu #{@header.machine}" end end
reads the existing segment/sections.encoded and populate @relocations from the encoded.reloc hash
# File metasm/exe_format/elf_encode.rb, line 635 def create_relocations @relocations = [] arch_create_reloc_func = "arch_#{@header.machine.downcase}_create_reloc" if not respond_to? arch_create_reloc_func puts "Elf: create_reloc: unhandled architecture #{@header.machine}" if $VERBOSE return end # create a fake binding with all our own symbols # not foolproof, should work in most cases curaddr = label_at(@encoded, 0, 'elf_start') binding = {'_DYNAMIC' => 0, '_GOT' => 0} # XXX @sections.each { |s| next if not s.encoded binding.update s.encoded.binding(curaddr) curaddr = Expression[curaddr, :+, s.encoded.virtsize] } @sections.each { |s| next if not s.encoded s.encoded.reloc.each { |off, rel| t = rel.target.bind(binding).reduce next if not t.kind_of? Expression # XXX segment_encode only send(arch_create_reloc_func, s, off, binding) } } end
decodes the elf header, and depending on the elf type, decode segments or sections
# File metasm/exe_format/elf_decode.rb, line 906 def decode decode_header case @header.type when 'DYN', 'EXEC'; decode_segments when 'REL'; decode_sections when 'CORE' end end
basic immediates decoding functions
# File metasm/exe_format/elf_decode.rb, line 44 def decode_byte( edata = @encoded) edata.decode_imm(:u8, @endianness) end
decodes the debugging information if available only a subset of DWARF2/3 is handled right now most info taken from ratonland.org/?entry=39 & libdwarf/dwarf.h
# File metasm/exe_format/elf_decode.rb, line 823 def decode_debug return if not @sections # assert presence of DWARF sections info = @sections.find { |sec| sec.name == '.debug_info' } abbrev = @sections.find { |sec| sec.name == '.debug_abbrev' } str = @sections.find { |sec| sec.name == '.debug_str' } return if not info or not abbrev # section -> content info = @encoded[info.offset, info.size] abbrev = @encoded[abbrev.offset, abbrev.size] str = @encoded[str.offset, str.size] if str @debug = [] while info.ptr < info.length @debug << DwarfDebug.decode(self, info, abbrev, str) end end
# File metasm/exe_format/elf_decode.rb, line 901 def decode_exports decode_segments_dynamic(false) end
# File metasm/exe_format/elf_decode.rb, line 45 def decode_half( edata = @encoded) edata.decode_imm(:u16, @endianness) end
decodes the elf header, section & program header
# File metasm/exe_format/elf_decode.rb, line 106 def decode_header(off = 0, decode_phdr=true, decode_shdr=true) @encoded.ptr = off @header.decode self raise InvalidExeFormat, "Invalid elf header size: #{@header.ehsize}" if Header.sizeof(self) != @header.ehsize if decode_phdr and @header.phoff != 0 decode_program_header(@header.phoff+off) end if decode_shdr and @header.shoff != 0 decode_section_header(@header.shoff+off) end end
decode an ULEB128 (dwarf2): read bytes while high bit is set, littleendian
# File metasm/exe_format/elf_decode.rb, line 810 def decode_leb(ed = @encoded) v = s = 0 loop { b = ed.read(1).unpack('C').first.to_i v |= (b & 0x7f) << s s += 7 break v if (b&0x80) == 0 } end
decodes the program header table marks the elf entrypoint as an export of
self.encoded
# File metasm/exe_format/elf_decode.rb, line 139 def decode_program_header(off = @header.phoff) raise InvalidExeFormat, "Invalid elf program header size: #{@header.phentsize}" if Segment.sizeof(self) != @header.phentsize @encoded.add_export new_label('program_header'), off @encoded.ptr = off @segments = [] @header.phnum.times { @segments << Segment.decode(self) } if @header.entry != 0 add_label('entrypoint', @header.entry) end end
decodes the section header section names are read from shstrndx if possible
# File metasm/exe_format/elf_decode.rb, line 120 def decode_section_header(off = @header.shoff) raise InvalidExeFormat, "Invalid elf section header size: #{@header.shentsize}" if Section.sizeof(self) != @header.shentsize @encoded.add_export new_label('section_header'), off @encoded.ptr = off @sections = [] @header.shnum.times { @sections << Section.decode(self) } # read sections name if @header.shstrndx != 0 and str = @sections[@header.shstrndx] and str.encoded = @encoded[str.offset, str.size] # LoadedElf may not have shstr mmaped @sections[1..-1].each { |s| s.name = readstr(str.encoded.data, s.name_p) add_label("section_#{s.name}", s.addr) if s.name and s.addr > 0 } end end
decodes sections, interprets symbols/relocs, fills sections.encoded
# File metasm/exe_format/elf_decode.rb, line 879 def decode_sections @symbols.clear # the NULL symbol is explicit in the symbol table decode_sections_symbols decode_sections_relocs @sections.each { |s| case s.type when 'PROGBITS', 'NOBITS' when 'TODO' # TODO end } @sections.find_all { |s| s.type == 'PROGBITS' or s.type == 'NOBITS' }.each { |s| if s.flags.include? 'ALLOC' if s.type == 'NOBITS' s.encoded = EncodedData.new '', :virtsize => s.size else s.encoded = @encoded[s.offset, s.size] || EncodedData.new s.encoded.virtsize = s.size end end } end
decode REL/RELA sections
# File metasm/exe_format/elf_decode.rb, line 447 def decode_sections_relocs @relocations ||= [] @sections.to_a.each { |sec| case sec.type when 'REL'; relcls = Relocation when 'RELA'; relcls = RelocationAddend else next end startidx = @relocations.length @encoded.ptr = sec.offset while @encoded.ptr < sec.offset + sec.size @relocations << relcls.decode(self) end # create edata relocs tsec = @sections[sec.info] relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}" next if not respond_to? relocproc new_label('pcrel') @relocations[startidx..-1].each { |r| o = @encoded.ptr = tsec.offset + r.offset r = r.dup l = new_label('pcrel') r.offset = Expression[l] if rel = send(relocproc, r) @encoded.reloc[o] = rel end } } end
decode SYMTAB sections
# File metasm/exe_format/elf_decode.rb, line 420 def decode_sections_symbols @symbols ||= [] @sections.to_a.each { |sec| next if sec.type != 'SYMTAB' next if not strtab = @sections[sec.link] strtab = @encoded[strtab.offset, strtab.size].data @encoded.ptr = sec.offset syms = [] raise 'Invalid symbol table' if sec.size > @encoded.length (sec.size / Symbol.sizeof(self)).times { syms << Symbol.decode(self, strtab) } alreadysegs = true if @header.type == 'DYN' or @header.type == 'EXEC' alreadysyms = @symbols.inject({}) { |h, s| h.update s.name => true } if alreadysegs syms.each { |s| if alreadysegs # if we already decoded the symbols from the DYNAMIC segment, # ignore dups and imports from this section next if s.shndx == 'UNDEF' next if alreadysyms[s.name] alreadysyms[s.name] = true end @symbols << s decode_symbol_export(s) } } end
decodes the dynamic segment, fills segments.encoded
# File metasm/exe_format/elf_decode.rb, line 857 def decode_segments decode_segments_dynamic decode_sections_symbols #decode_debug # too many info, decode on demand @segments.each { |s| case s.type when 'LOAD', 'INTERP' sz = s.filesz pagepad = (-(s.offset + sz)) % 4096 s.encoded = @encoded[s.offset, sz] || EncodedData.new if s.type == 'LOAD' and sz > 0 and not s.flags.include?('W') # align loaded data to the next page boundary for readonly mmap # but discard the labels/relocs etc s.encoded << @encoded[s.offset+sz, pagepad].data rescue nil s.encoded.virtsize = sz+pagepad end s.encoded.virtsize = s.memsz if s.memsz > s.encoded.virtsize end } end
decodes the ELF dynamic tags, interpret them, and decodes symbols and relocs
# File metasm/exe_format/elf_decode.rb, line 845 def decode_segments_dynamic(decode_relocs=true) return if not dynamic = @segments.find { |s| s.type == 'DYNAMIC' } @encoded.ptr = add_label('dynamic_tags', dynamic.vaddr) decode_tags decode_segments_tags_interpret decode_segments_symbols return if not decode_relocs decode_segments_relocs decode_segments_relocs_interpret end
decode relocation tables (REL, RELA, JMPREL) from @tags
# File metasm/exe_format/elf_decode.rb, line 479 def decode_segments_relocs @relocations.clear if @encoded.ptr = @tag['REL'] raise "E: ELF: unsupported rel entry size #{@tag['RELENT']}" if @tag['RELENT'] != Relocation.sizeof(self) p_end = @encoded.ptr + @tag['RELSZ'] while @encoded.ptr < p_end @relocations << Relocation.decode(self) end end if @encoded.ptr = @tag['RELA'] raise "E: ELF: unsupported rela entry size #{@tag['RELAENT'].inspect}" if @tag['RELAENT'] != RelocationAddend.sizeof(self) p_end = @encoded.ptr + @tag['RELASZ'] while @encoded.ptr < p_end @relocations << RelocationAddend.decode(self) end end if @encoded.ptr = @tag['JMPREL'] case reltype = @tag['PLTREL'] when 'REL'; relcls = Relocation when 'RELA'; relcls = RelocationAddend else raise "E: ELF: unsupported plt relocation type #{reltype}" end p_end = @encoded.ptr + @tag['PLTRELSZ'] while @encoded.ptr < p_end @relocations << relcls.decode(self) end end end
use relocations as self.encoded.reloc
# File metasm/exe_format/elf_decode.rb, line 511 def decode_segments_relocs_interpret relocproc = "arch_decode_segments_reloc_#{@header.machine.to_s.downcase}" if not respond_to? relocproc puts "W: Elf: relocs for arch #{@header.machine} unsupported" if $VERBOSE return end @relocations.each { |r| next if r.offset == 0 if not o = addr_to_off(r.offset) puts "W: Elf: relocation in unmmaped space (#{r.inspect})" if $VERBOSE next end if @encoded.reloc[o] puts "W: Elf: not rerelocating address #{'%08X' % r.offset}" if $VERBOSE next end @encoded.ptr = o if rel = send(relocproc, r) @encoded.reloc[o] = rel end } if @header.machine == 'MIPS' and @tag['PLTGOT'] and @tag['GOTSYM'] and @tag['LOCAL_GOTNO'] puts "emulating mips PLT-like relocs" if $VERBOSE wsz = @bitsize/8 dyntab = label_addr(@tag['PLTGOT']) - (@tag['GOTSYM'] - @tag['LOCAL_GOTNO']) * wsz dt_o = addr_to_off(dyntab) @symbols.each_with_index { |sym, i| next if i < @tag['GOTSYM'] or not sym.name r = Metasm::Relocation.new(Expression[sym.name], "u#@bitsize".to_sym, @endianness) @encoded.reloc[dt_o + wsz*i] = r } end end
read symbol table, and mark all symbols found as exports of self.encoded tables locations are found in self.tags XXX symbol count is found from the hash table, this may not work with GNU_HASH only binaries
# File metasm/exe_format/elf_decode.rb, line 391 def decode_segments_symbols return unless @tag['STRTAB'] and @tag['STRSZ'] and @tag['SYMTAB'] and (@tag['HASH'] or @tag['GNU_HASH']) raise "E: ELF: unsupported symbol entry size: #{@tag['SYMENT']}" if @tag['SYMENT'] != Symbol.sizeof(self) # find number of symbols if @tag['HASH'] @encoded.ptr = @tag['HASH'] # assume tag already interpreted (would need addr_to_off otherwise) decode_word sym_count = decode_word else sym_count = check_symbols_gnu_hash(@tag['GNU_HASH'], true) end strtab = @encoded[@tag['STRTAB'], @tag['STRSZ']].data.to_str @encoded.ptr = @tag['SYMTAB'] @symbols.clear sym_count.times { s = Symbol.decode(self, strtab) @symbols << s decode_symbol_export(s) } check_symbols_hash if $VERBOSE check_symbols_gnu_hash if $VERBOSE end
# File metasm/exe_format/elf_decode.rb, line 47 def decode_sword(edata = @encoded) edata.decode_imm(:i32, @endianness) end
# File metasm/exe_format/elf_decode.rb, line 49 def decode_sxword(edata= @encoded) edata.decode_imm((@bitsize == 32 ? :i32 : :i64), @endianness) end
marks a symbol as @encoded.export (from s.value, using segments or sections)
# File metasm/exe_format/elf_decode.rb, line 366 def decode_symbol_export(s) if s.name and s.shndx != 'UNDEF' and %w[NOTYPE OBJECT FUNC].include?(s.type) if @header.type == 'REL' and s.shndx.kind_of?(::Integer) and sec = @sections[s.shndx] o = sec.offset + s.value elsif not o = addr_to_off(s.value) # allow to point to end of segment if not seg = @segments.find { |seg_| seg_.type == 'LOAD' and seg_.vaddr + seg_.memsz == s.value } # check end puts "W: Elf: symbol points to unmmaped space (#{s.inspect})" if $VERBOSE and s.shndx != 'ABS' return end # LoadedELF would have returned an addr_to_off = addr o = s.value - seg.vaddr + seg.offset end name = s.name while @encoded.export[name] and @encoded.export[name] != o puts "W: Elf: symbol #{name} already seen at #{'%X' % @encoded.export[name]} - now at #{'%X' % o}) (may be a different version definition)" if $VERBOSE name += '_' # do not modify inplace end @encoded.add_export name, o end end
# File metasm/exe_format/elf_decode.rb, line 46 def decode_word( edata = @encoded) edata.decode_imm(:u32, @endianness) end
# File metasm/exe_format/elf_decode.rb, line 48 def decode_xword(edata = @encoded) edata.decode_imm((@bitsize == 32 ? :u32 : :u64), @endianness) end
# File metasm/exe_format/elf_decode.rb, line 955 def dump_section_header(addr, edata) if s = @segments.find { |s_| s_.vaddr == addr } "\n// ELF segment at #{Expression[addr]}, flags = #{s.flags.sort.join(', ')}" else super(addr, edata) end end
# File metasm/exe_format/elf_decode.rb, line 915 def each_section @segments.each { |s| yield s.encoded, s.vaddr if s.type == 'LOAD' } return if @header.type != 'REL' @sections.each { |s| next if not s.encoded if not l = s.encoded.inv_export[0] or l != s.name.tr('^a-zA-Z0-9_', '_') l = new_label(s.name) s.encoded.add_export l, 0 end yield s.encoded, l } end
create the relocations from the sections.encoded.reloc create the dynamic sections put sections/phdr in PT_LOAD segments link TODO support mapped PHDR, obey section-specified base address, handle NOBITS
encode ET_REL
# File metasm/exe_format/elf_encode.rb, line 869 def encode(type='DYN') @header.type ||= {:bin => 'EXEC', :lib => 'DYN', :obj => 'REL'}.fetch(type, type) @header.machine ||= case @cpu.shortname when 'x64'; 'X86_64' when 'ia32'; '386' when 'mips'; 'MIPS' when 'powerpc'; 'PPC' when 'arm'; 'ARM' end if @header.type == 'REL' encode_rel else encode_elf end end
sorted insert of a new section to self.sections according to its permission (for segment merging)
# File metasm/exe_format/elf_encode.rb, line 135 def encode_add_section s # order: r rx rw noalloc rank = lambda { |sec| f = sec.flags sec.type == 'NULL' ? -2 : sec.addr ? -1 : f.include?('ALLOC') ? !f.include?('WRITE') ? !f.include?('EXECINSTR') ? 0 : 1 : 2 : 3 } srank = rank[s] nexts = @sections.find { |sec| rank[sec] > srank } # find section with rank superior nexts = nexts ? @sections.index(nexts) : -1 # if none, last if @header.shstrndx.to_i != 0 and nexts != -1 and @header.shstrndx >= nexts @header.shstrndx += 1 end @sections.insert(nexts, s) # insert section end
# File metasm/exe_format/elf_encode.rb, line 102 def encode_byte(w) Expression[w].encode(:u8, @endianness, (caller if $DEBUG)) end
checks a section's data has not grown beyond s.size, if so undefs addr/offset
# File metasm/exe_format/elf_encode.rb, line 112 def encode_check_section_size(s) if s.size and s.encoded.virtsize < s.size puts "W: Elf: preexisting section #{s} has grown, relocating" if $VERBOSE s.addr = s.offset = nil s.size = s.encoded.virtsize end end
# File metasm/exe_format/elf_encode.rb, line 886 def encode_elf @encoded = EncodedData.new if @header.type != 'EXEC' or @segments.find { |i| i.type == 'INTERP' } # create a .dynamic section unless we are an ET_EXEC with .nointerp automagic_symbols create_relocations encode_segments_dynamic end @segments.delete_if { |s| s.type == 'INTERP' } if not @header.entry encode_make_segments_from_sections loadsegs = @segments.find_all { |seg_| seg_.type == 'LOAD' } # ensure PT_INTERP is mapped if present if interp = @segments.find { |i| i.type == 'INTERP' } if not seg = loadsegs.find { |seg_| not seg_.memsz and interp.flags & seg_.flags == interp.flags and not loadsegs[loadsegs.index(seg_)+1..-1].find { |sseg| o = Expression[sseg.vaddr, :-, [seg_.vaddr, :+, seg_.encoded.length+interp.encoded.length]].reduce o.kind_of? ::Integer and o < 0 } } seg = Segment.new seg.type = 'LOAD' seg.flags = interp.flags.dup seg.align = 0x1000 seg.encoded = EncodedData.new seg.offset = new_label('segment_offset') seg.vaddr = new_label('segment_address') loadsegs << seg @segments << seg end interp.vaddr = Expression[seg.vaddr, :+, seg.encoded.length] interp.offset = Expression[seg.offset, :+, seg.encoded.length] seg.encoded << interp.encoded interp.encoded = nil end # ensure last PT_LOAD is writeable (used for bss) seg = loadsegs.last if not seg or not seg.flags.include? 'W' seg = Segment.new seg.type = 'LOAD' seg.flags = ['R', 'W'] seg.encoded = EncodedData.new loadsegs << seg @segments << seg end # add dynamic segment if ds = @sections.find { |sec| sec.type == 'DYNAMIC' } and ds.encoded.length > 1 ds.set_default_values self seg = Segment.new seg.type = 'DYNAMIC' seg.flags = ['R', 'W'] seg.offset = ds.offset seg.vaddr = ds.addr seg.memsz = seg.filesz = ds.size @segments << seg end # use variables in the first segment descriptor, to allow fixup later # (when we'll be able to include the program header) if first_seg = loadsegs.first first_seg_oaddr = first_seg.vaddr # section's vaddr depend on oaddr first_seg_off = first_seg.offset first_seg.vaddr = new_label('segvaddr') first_seg.offset = new_label('segoff') first_seg.memsz = new_label('segmemsz') first_seg.filesz = new_label('segfilsz') end if first_seg and not @segments.find { |seg_| seg_.type == 'PHDR' } phdr = Segment.new phdr.type = 'PHDR' phdr.flags = first_seg.flags phdr.offset = new_label('segoff') phdr.vaddr = new_label('segvaddr') phdr.filesz = phdr.memsz = new_label('segmemsz') @segments.unshift phdr end # encode section&program headers if @header.shnum != 0 st = @sections.inject(EncodedData.new) { |edata, s| edata << s.encode(self) } else @header.shoff = @header.shnum = @header.shstrndx = 0 end pt = @segments.inject(EncodedData.new) { |edata, s| edata << s.encode(self) } binding = {} @encoded << @header.encode(self) @encoded.align 8 binding[@header.phoff] = @encoded.length if phdr binding[phdr.offset] = @encoded.length pt.add_export phdr.vaddr, 0 binding[phdr.memsz] = pt.length end @encoded << pt @encoded.align 8 if first_seg # put headers into the 1st mmaped segment if first_seg_oaddr.kind_of? ::Integer # pad headers to align the 1st segment's data @encoded.virtsize += (first_seg_oaddr - @encoded.virtsize) & 0xfff addr = first_seg_oaddr - @encoded.length else addr = ((@header.type == 'EXEC') ? 0x08048000 : 0) binding[first_seg_oaddr] = addr + @encoded.length end binding[first_seg_off] = @encoded.length if not first_seg_off.kind_of? ::Integer first_seg.encoded = @encoded << first_seg.encoded @encoded = EncodedData.new binding[first_seg.memsz] = first_seg.encoded.virtsize if not first_seg.memsz.kind_of? ::Integer binding[first_seg.filesz] = first_seg.encoded.rawsize if not first_seg.filesz.kind_of? ::Integer end @segments.each { |seg_| next if not seg_.encoded if seg_.vaddr.kind_of? ::Integer raise "cannot put segment at address #{Expression[seg_.vaddr]} (now at #{Expression[addr]})" if seg_.vaddr < addr addr = seg_.vaddr else binding[seg_.vaddr] = addr end # ensure seg_.vaddr & page_size == seg_.offset & page_size @encoded.virtsize += (addr - @encoded.virtsize) & 0xfff binding.update seg_.encoded.binding(addr) binding[seg_.offset] = @encoded.length seg_.encoded.align 8 @encoded << seg_.encoded[0, seg_.encoded.rawsize] addr += seg_.encoded.length # page break for memory permission enforcement if @segments[@segments.index(seg_)+1..-1].find { |seg__| seg__.encoded and seg__.vaddr.kind_of? ::Integer } addr += 0x1000 - (addr & 0xfff) if addr & 0xfff != 0 # minimize memory size else addr += 0x1000 # minimize file size end } binding[@header.shoff] = @encoded.length if st @encoded << st @encoded.align 8 @sections.each { |sec| next if not sec.encoded or sec.flags.include? 'ALLOC' # already in a segment.encoded binding[sec.offset] = @encoded.length binding.update sec.encoded.binding @encoded << sec.encoded @encoded.align 8 } @encoded.fixup! binding @encoded.data end
# File metasm/exe_format/elf_encode.rb, line 1362 def encode_file(path, *a) ret = super(path, *a) File.chmod(0755, path) if @header.entry and @header.entry != 0 ret end
encodes the GNU_HASH table TODO
# File metasm/exe_format/elf_encode.rb, line 153 def encode_gnu_hash return if true sortedsyms = @symbols.find_all { |s| s.bind == 'GLOBAL' and s.shndx != 'UNDEF' and s.name } bucket = Array.new(42) if not gnu_hash = @sections.find { |s| s.type == 'GNU_HASH' } gnu_hash = Section.new gnu_hash.name = '.gnu.hash' gnu_hash.type = 'GNU_HASH' gnu_hash.flags = ['ALLOC'] gnu_hash.entsize = gnu_hash.addralign = 4 encode_add_section gnu_hash end gnu_hash.encoded = EncodedData.new # "bloomfilter[N] has bit B cleared if there is no M (M > symndx) which satisfies (C = @header.class) # ((gnu_hash(sym[M].name) / C) % maskwords) == N && # ((gnu_hash(sym[M].name) % C) == B || # ((gnu_hash(sym[M].name) >> shift2) % C) == B" # bloomfilter may be [~0] bloomfilter = [] # bucket[N] contains the lowest M for which # gnu_hash(sym[M]) % nbuckets == N # or 0 if none bucket = [] gnu_hash.encoded << encode_word(bucket.length) << encode_word(@symbols.length - sortedsyms.length) << encode_word(bloomfilter.length) << encode_word(shift2) bloomfilter.each { |bf| gnu_hash.encoded << encode_xword(bf) } bucket.each { |bk| gnu_hash.encoded << encode_word(bk) } sortedsyms.each { |s| # (gnu_hash(sym[N].name) & ~1) | (N == dynsymcount-1 || (gnu_hash(sym[N].name) % nbucket) != (gnu_hash(sym[N+1].name) % nbucket)) # that's the hash, with its lower bit replaced by the bool [1 if i am the last sym having my hash as hash] val = 28 gnu_hash.encoded << encode_word(val) } @tag['GNU_HASH'] = label_at(gnu_hash.encoded, 0) encode_check_section_size gnu_hash gnu_hash end
# File metasm/exe_format/elf_encode.rb, line 103 def encode_half(w) Expression[w].encode(:u16, @endianness, (caller if $DEBUG)) end
encodes the symbol dynamic hash table in the .hash section, updates the HASH tag
# File metasm/exe_format/elf_encode.rb, line 203 def encode_hash return if @symbols.length <= 1 if not hash = @sections.find { |s| s.type == 'HASH' } hash = Section.new hash.name = '.hash' hash.type = 'HASH' hash.flags = ['ALLOC'] hash.entsize = hash.addralign = 4 encode_add_section hash end hash.encoded = EncodedData.new # to find a symbol from its name : # 1: idx = hash(name) # 2: idx = bucket[idx % bucket.size] # 3: if idx == 0: return notfound # 4: if dynsym[idx].name == name: return found # 5: idx = chain[idx] ; goto 3 bucket = Array.new(@symbols.length/4+1, 0) chain = Array.new(@symbols.length, 0) @symbols.each_with_index { |s, i| next if s.bind == 'LOCAL' or not s.name or s.shndx == 'UNDEF' hash_mod = ELF.hash_symbol_name(s.name) % bucket.length chain[i] = bucket[hash_mod] bucket[hash_mod] = i } hash.encoded << encode_word(bucket.length) << encode_word(chain.length) bucket.each { |b| hash.encoded << encode_word(b) } chain.each { |c| hash.encoded << encode_word(c) } @tag['HASH'] = label_at(hash.encoded, 0) encode_check_section_size hash hash end
put every ALLOC section in a segment, create segments if needed sections with a good offset within a segment are ignored
# File metasm/exe_format/elf_encode.rb, line 817 def encode_make_segments_from_sections # fixed addresses first seclist = @sections.find_all { |sec| sec.addr.kind_of? Integer }.sort_by { |sec| sec.addr } | @sections seclist.each { |sec| next if not sec.flags.to_a.include? 'ALLOC' # check if we fit in an existing segment loadsegs = @segments.find_all { |seg_| seg_.type == 'LOAD' } if sec.addr.kind_of?(::Integer) and seg = loadsegs.find { |seg_| seg_.vaddr.kind_of?(::Integer) and seg_.vaddr <= sec.addr and seg_.vaddr + seg_.memsz >= sec.addr + sec.size } # sections is already inside a segment: we're reencoding an ELF, just patch the section in the segment seg.encoded[sec.addr - seg.vaddr, sec.size] = sec.encoded if sec.encoded next end if not seg = loadsegs.find { |seg_| sec.flags.to_a.include?('WRITE') == seg_.flags.to_a.include?('W') and #sec.flags.to_a.include?('EXECINSTR') == seg_.flags.to_a.include?('X') and not seg_.memsz and not loadsegs[loadsegs.index(seg_)+1..-1].find { |sseg| # check if another segment would overlap if we add the sec to seg_ o = Expression[sseg.vaddr, :-, [seg_.vaddr, :+, seg_.encoded.length+sec.encoded.length]].reduce o.kind_of? ::Integer and o < 0 } } # nope, create a new one seg = Segment.new seg.type = 'LOAD' seg.flags = ['R'] seg.flags << 'W' if sec.flags.include? 'WRITE' seg.align = 0x1000 seg.encoded = EncodedData.new seg.offset = new_label('segment_offset') seg.vaddr = sec.addr || new_label('segment_address') @segments << seg end seg.flags |= ['X'] if sec.flags.include? 'EXECINSTR' seg.encoded.align sec.addralign if sec.addralign sec.addr = Expression[seg.vaddr, :+, seg.encoded.length] sec.offset = Expression[seg.offset, :+, seg.encoded.length] seg.encoded << sec.encoded } end
# File metasm/exe_format/elf_encode.rb, line 1046 def encode_rel @encoded = EncodedData.new automagic_symbols create_relocations @header.phoff = @header.phnum = @header.phentsize = 0 @header.entry = 0 @sections.each { |sec| sec.addr = 0 } st = @sections.inject(EncodedData.new) { |edata, sec| edata << sec.encode(self) } binding = {} @encoded << @header.encode(self) @encoded.align 8 binding[@header.shoff] = @encoded.length @encoded << st @encoded.align 8 @sections.each { |sec| next if not sec.encoded binding[sec.offset] = @encoded.length sec.encoded.fixup sec.encoded.binding @encoded << sec.encoded @encoded.align 8 } @encoded.fixup! binding @encoded.data end
reorders self.symbols according to their gnu_hash
# File metasm/exe_format/elf_encode.rb, line 121 def encode_reorder_symbols gnu_hash_bucket_length = 42 # TODO @symbols[1..-1] = @symbols[1..-1].sort_by { |s| if s.bind != 'GLOBAL' -2 elsif s.shndx == 'UNDEF' or not s.name -1 else ELF.gnu_hash_symbol_name(s.name) % gnu_hash_bucket_length end } end
encodes the .dynamic section, creates .hash/.gnu.hash/.rel/.rela/.dynsym/.strtab/.init,*_array as needed
# File metasm/exe_format/elf_encode.rb, line 491 def encode_segments_dynamic if not strtab = @sections.find { |s| s.type == 'STRTAB' and s.flags.include? 'ALLOC' } strtab = Section.new strtab.name = '.dynstr' strtab.addralign = 1 strtab.type = 'STRTAB' strtab.flags = ['ALLOC'] encode_add_section strtab end strtab.encoded = EncodedData.new << 0 @tag['STRTAB'] = label_at(strtab.encoded, 0) if not dynamic = @sections.find { |s| s.type == 'DYNAMIC' } dynamic = Section.new dynamic.name = '.dynamic' dynamic.type = 'DYNAMIC' dynamic.flags = %w[WRITE ALLOC] # XXX why write ? dynamic.addralign = dynamic.entsize = @bitsize / 8 * 2 dynamic.link = strtab encode_add_section dynamic end dynamic.encoded = EncodedData.new('', :export => {'_DYNAMIC' => 0}) encode_tag = lambda { |k, v| dynamic.encoded << encode_sxword(int_from_hash(k, DYNAMIC_TAG)) << encode_xword(v) } # find or create string in strtab add_str = lambda { |n| if n and n != '' and not ret = strtab.encoded.data.index(n + 0.chr) ret = strtab.encoded.virtsize n.force_encoding('BINARY') if n.respond_to?(:force_encoding) strtab.encoded << n << 0 end ret || 0 } @tag.keys.each { |k| case k when 'NEEDED'; @tag[k].each { |n| encode_tag[k, add_str[n]] } when 'SONAME', 'RPATH', 'RUNPATH'; encode_tag[k, add_str[@tag[k]]] when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY' # build section containing the array if not ar = @sections.find { |s| s.name == '.' + k.downcase } ar = Section.new ar.name = '.' + k.downcase ar.type = k ar.addralign = ar.entsize = @bitsize/8 ar.flags = %w[WRITE ALLOC] ar.encoded = EncodedData.new encode_add_section ar # insert before encoding syms/relocs (which need section indexes) end # fill these later, but create the base relocs now arch_create_reloc_func = "arch_#{@header.machine.downcase}_create_reloc" next if not respond_to?(arch_create_reloc_func) curaddr = label_at(@encoded, 0, 'elf_start') fkbind = {} @sections.each { |s| next if not s.encoded fkbind.update s.encoded.binding(Expression[curaddr, :+, 1]) } @relocations ||= [] off = ar.encoded.length @tag[k].each { |a| rel = Metasm::Relocation.new(Expression[a], "u#@bitsize".to_sym, @endianness) send(arch_create_reloc_func, ar, off, fkbind, rel) off += @bitsize/8 } end } encode_reorder_symbols encode_gnu_hash encode_hash encode_segments_relocs dynsym = encode_segments_symbols(strtab) @sections.find_all { |s| %w[HASH GNU_HASH REL RELA].include? s.type }.each { |s| s.link = dynsym } encode_check_section_size strtab # rm unused tag (shrink .nointerp binaries by allowing to skip the section entirely) @tag.delete('STRTAB') if strtab.encoded.length == 1 # XXX any order needed ? @tag.keys.each { |k| case k when Integer # unknown tags = array of values @tag[k].each { |n| encode_tag[k, n] } when 'PLTREL'; encode_tag[k, int_from_hash(@tag[k], DYNAMIC_TAG)] when 'FLAGS'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FLAGS)] when 'FLAGS_1'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FLAGS_1)] when 'FEATURES_1'; encode_tag[k, bits_from_hash(@tag[k], DYNAMIC_FEATURES_1)] when 'NULL' # keep last when 'STRTAB' encode_tag[k, @tag[k]] encode_tag['STRSZ', strtab.encoded.size] when 'INIT_ARRAY', 'FINI_ARRAY', 'PREINIT_ARRAY' # build section containing the array ar = @sections.find { |s| s.name == '.' + k.downcase } @tag[k].each { |p| ar.encoded << encode_addr(p) } encode_check_section_size ar encode_tag[k, label_at(ar.encoded, 0)] encode_tag[k + 'SZ', ar.encoded.virtsize] when 'NEEDED', 'SONAME', 'RPATH', 'RUNPATH' # already handled else encode_tag[k, @tag[k]] end } encode_tag['NULL', @tag['NULL'] || 0] unless @tag.empty? encode_check_section_size dynamic end
encodes the relocation tables needs a complete self.symbols array
# File metasm/exe_format/elf_encode.rb, line 272 def encode_segments_relocs return if not @relocations or @relocations.empty? arch_preencode_reloc_func = "arch_#{@header.machine.downcase}_preencode_reloc" send arch_preencode_reloc_func if respond_to? arch_preencode_reloc_func list = @relocations.find_all { |r| r.type == 'JMP_SLOT' } if not list.empty? or @relocations.empty? if list.find { |r| r.addend } stype = 'RELA' sname = '.rela.plt' else stype = 'REL' sname = '.rel.plt' end if not relplt = @sections.find { |s| s.type == stype and s.name == sname } relplt = Section.new relplt.name = sname relplt.flags = ['ALLOC'] encode_add_section relplt end relplt.encoded = EncodedData.new('', :export => {'_REL_PLT' => 0}) list.each { |r| relplt.encoded << r.encode(self) } @tag['JMPREL'] = label_at(relplt.encoded, 0) @tag['PLTRELSZ'] = relplt.encoded.virtsize @tag['PLTREL'] = relplt.type = stype @tag[stype + 'ENT'] = relplt.entsize = relplt.addralign = (stype == 'REL' ? Relocation.sizeof(self) : RelocationAddend.sizeof(self)) encode_check_section_size relplt end list = @relocations.find_all { |r| r.type != 'JMP_SLOT' and not r.addend } if not list.empty? if not @tag['TEXTREL'] and @sections.find { |s_| s_.encoded and e = s_.encoded.inv_export[0] and not s_.flags.include? 'WRITE' and list.find { |r| Expression[r.offset, :-, e].reduce.kind_of? ::Integer } # TODO need to check with r.offset.bind(elf_binding) } @tag['TEXTREL'] = 0 end if not rel = @sections.find { |s_| s_.type == 'REL' and s_.name == '.rel.dyn' } rel = Section.new rel.name = '.rel.dyn' rel.type = 'REL' rel.flags = ['ALLOC'] rel.entsize = rel.addralign = Relocation.sizeof(self) encode_add_section rel end rel.encoded = EncodedData.new list.each { |r| rel.encoded << r.encode(self) } @tag['REL'] = label_at(rel.encoded, 0) @tag['RELENT'] = Relocation.sizeof(self) @tag['RELSZ'] = rel.encoded.virtsize encode_check_section_size rel end list = @relocations.find_all { |r| r.type != 'JMP_SLOT' and r.addend } if not list.empty? if not rela = @sections.find { |s_| s_.type == 'RELA' and s_.name == '.rela.dyn' } rela = Section.new rela.name = '.rela.dyn' rela.type = 'RELA' rela.flags = ['ALLOC'] rela.entsize = rela.addralign = RelocationAddend.sizeof(self) encode_add_section rela end rela.encoded = EncodedData.new list.each { |r| rela.encoded << r.encode(self) } @tag['RELA'] = label_at(rela.encoded, 0) @tag['RELAENT'] = RelocationAddend.sizeof(self) @tag['RELASZ'] = rela.encoded.virtsize encode_check_section_size rela end end
encodes the symbol table should have a stable self.sections array (only append allowed after this step)
# File metasm/exe_format/elf_encode.rb, line 245 def encode_segments_symbols(strtab) return if @symbols.length <= 1 if not dynsym = @sections.find { |s| s.type == 'DYNSYM' } dynsym = Section.new dynsym.name = '.dynsym' dynsym.type = 'DYNSYM' dynsym.entsize = Symbol.sizeof(self) dynsym.addralign = 4 dynsym.flags = ['ALLOC'] dynsym.info = @symbols[1..-1].find_all { |s| s.bind == 'LOCAL' }.length + 1 dynsym.link = strtab encode_add_section dynsym end dynsym.encoded = EncodedData.new @symbols.each { |s| dynsym.encoded << s.encode(self, strtab.encoded) } # needs all section indexes, as will be in the final section header @tag['SYMTAB'] = label_at(dynsym.encoded, 0) @tag['SYMENT'] = Symbol.sizeof(self) encode_check_section_size dynsym dynsym end
# File metasm/exe_format/elf_encode.rb, line 105 def encode_sword(w) Expression[w].encode(:i32, @endianness, (caller if $DEBUG)) end
# File metasm/exe_format/elf_encode.rb, line 107 def encode_sxword(w) Expression[w].encode((@bitsize == 32 ? :i32 : :i64), @endianness, (caller if $DEBUG)) end
# File metasm/exe_format/elf_encode.rb, line 104 def encode_word(w) Expression[w].encode(:u32, @endianness, (caller if $DEBUG)) end
# File metasm/exe_format/elf_encode.rb, line 106 def encode_xword(w) Expression[w].encode((@bitsize == 32 ? :u32 : :u64), @endianness, (caller if $DEBUG)) end
file offset -> memory address handles relocated LoadedELF
# File metasm/exe_format/elf_decode.rb, line 75 def fileoff_to_addr(foff) if s = @segments.find { |s_| s_.type == 'LOAD' and s_.offset <= foff and s_.offset + s_.filesz > foff } la = module_address la = (la == 0 ? (@load_address ||= 0) : 0) s.vaddr + la + foff - s.offset end end
returns an array including the ELF entrypoint (if not null) and the FUNC symbols addresses TODO include init/init_array
# File metasm/exe_format/elf_decode.rb, line 946 def get_default_entrypoints ep = [] ep << @header.entry if @header.entry != 0 @symbols.each { |s| ep << s.value if s.shndx != 'UNDEF' and s.type == 'FUNC' } if @symbols ep end
returns a disassembler with a special decodedfunction for dlsym, __libc_start_main, and a default function (i386 only)
# File metasm/exe_format/elf_decode.rb, line 963 def init_disassembler d = super() d.backtrace_maxblocks_data = 4 if d.get_section_at(0) # fixes call [constructor] => 0 d.decoded[0] = true d.function[0] = @cpu.disassembler_default_func end case @cpu.shortname when 'ia32', 'x64' old_cp = d.c_parser d.c_parser = nil d.parse_c <<EOC void *dlsym(int, char *); // has special callback // gcc's entrypoint, need pointers to reach main exe code (last callback) void __libc_start_main(void(*)(), int, int, void(*)(), void(*)()) __attribute__((noreturn)); // standard noreturn, optimized by gcc void __attribute__((noreturn)) exit(int); void _exit __attribute__((noreturn))(int); void abort(void) __attribute__((noreturn)); void __stack_chk_fail __attribute__((noreturn))(void); EOC d.function[Expression['dlsym']] = dls = @cpu.decode_c_function_prototype(d.c_parser, 'dlsym') d.function[Expression['__libc_start_main']] = @cpu.decode_c_function_prototype(d.c_parser, '__libc_start_main') d.function[Expression['exit']] = @cpu.decode_c_function_prototype(d.c_parser, 'exit') d.function[Expression['_exit']] = @cpu.decode_c_function_prototype(d.c_parser, '_exit') d.function[Expression['abort']] = @cpu.decode_c_function_prototype(d.c_parser, 'abort') d.function[Expression['__stack_chk_fail']] = @cpu.decode_c_function_prototype(d.c_parser, '__stack_chk_fail') d.c_parser = old_cp dls.btbind_callback = lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth| sz = @cpu.size/8 raise 'dlsym call error' if not dasm.decoded[calladdr] if @cpu.shortname == 'x64' arg2 = :rsi else arg2 = Indirection.new(Expression[:esp, :+, 2*sz], sz, calladdr) end fnaddr = dasm.backtrace(arg2, calladdr, :include_start => true, :maxdepth => maxdepth) if fnaddr.kind_of? ::Array and fnaddr.length == 1 and s = dasm.get_section_at(fnaddr.first) and fn = s[0].read(64) and i = fn.index(\0) and i > sz # try to avoid ordinals bind = bind.merge @cpu.register_symbols[0] => Expression[fn[0, i]] end bind } df = d.function[:default] = @cpu.disassembler_default_func df.backtrace_binding[@cpu.register_symbols[4]] = Expression[@cpu.register_symbols[4], :+, @cpu.size/8] df.btbind_callback = nil when 'mips' (d.address_binding[@header.entry] ||= {})[:$t9] ||= Expression[@header.entry] @symbols.each { |s| next if s.shndx == 'UNDEF' or s.type != 'FUNC' (d.address_binding[s.value] ||= {})[:$t9] ||= Expression[s.value] } d.function[:default] = @cpu.disassembler_default_func when 'sh4' noret = DecodedFunction.new noret.noreturn = true %w[__stack_chk_fail abort exit].each { |fn| d.function[Expression[fn]] = noret } d.function[:default] = @cpu.disassembler_default_func end d end
resets the fields of the elf headers that should be recalculated, eg phdr offset
# File metasm/exe_format/elf_encode.rb, line 801 def invalidate_header @header.shoff = @header.shnum = nil @header.phoff = @header.phnum = nil @header.shstrndx = nil @sections.to_a.each { |s| s.name_p = nil s.offset = nil } @segments.to_a.each { |s| s.offset = nil } self end
return the address of a label
# File metasm/exe_format/elf_decode.rb, line 84 def label_addr(name) if name.kind_of? Integer name elsif s = @segments.find { |s_| s_.encoded and s_.encoded.export[name] } s.vaddr + s.encoded.export[name] elsif o = @encoded.export[name] and s = @segments.find { |s_| s_.offset <= o and s_.offset + s_.filesz > o } s.vaddr + o - s.offset end end
# File metasm/exe_format/elf_decode.rb, line 1044 def module_address @segments.map { |s_| s_.vaddr if s_.type == 'LOAD' }.compact.min || 0 end
# File metasm/exe_format/elf_decode.rb, line 1040 def module_name @tag and @tag['SONAME'] end
# File metasm/exe_format/elf_decode.rb, line 1048 def module_size return 0 if not s = @segments.to_a.reverse.map { |s_| s_.vaddr + s_.memsz if s_.type == 'LOAD' }.compact.max s - module_address end
# File metasm/exe_format/elf_decode.rb, line 1053 def module_symbols syms = [] m_addr = module_address syms << ['entrypoint', @header.entry-m_addr] if @header.entry != 0 or @header.type == 'EXEC' @symbols.each { |s| next if not s.name or s.shndx == 'UNDEF' pfx = %w[LOCAL WEAK].include?(s.bind) ? s.bind.downcase + '_' : '' syms << [pfx+s.name, s.value-m_addr, s.size] } syms end
# File metasm/exe_format/elf_encode.rb, line 1076 def parse_init # allow the user to specify a section, falls back to .text if none specified if not defined? @cursource or not @cursource @cursource = Object.new class << @cursource attr_accessor :elf def <<(*a) t = Preprocessor::Token.new(nil) t.raw = '.text' elf.parse_parser_instruction t elf.cursource.send(:<<, *a) end end @cursource.elf = self end @segments.delete_if { |s| s.type == 'INTERP' } seg = Segment.new seg.type = 'INTERP' seg.encoded = EncodedData.new << (@bitsize == 64 ? DEFAULT_INTERP64 : DEFAULT_INTERP) << 0 seg.flags = ['R'] seg.memsz = seg.filesz = seg.encoded.length @segments.unshift seg @source ||= {} super() end
handles elf meta-instructions
syntax:
.section "<name>" [<perms>] [base=<base>] change current section (where normal instruction/data are put) perms = list of 'w' 'x' 'alloc', may be prefixed by 'no' 'r' ignored defaults to 'alloc' shortcuts: .text .data .rodata .bss base: immediate expression representing the section base address .entrypoint [<label>] defines the program entrypoint to the specified label / current location .global "<name>" [<label>] [<label_end>] [type=<FUNC|OBJECT|...>] [plt=<plt_label_name>] [undef] .weak ... .local ... builds a symbol with specified type/scope/size, type defaults to 'func' if plt_label_name is specified, the compiler will build an entry in the plt for this symbol, with this label (PIC & on-demand resolution) XXX plt ignored (automagic) .symbol [global|weak|local] "<name>" ... see .global/.weak/.local .needed "<libpath>" marks the elf as requiring the specified library (DT_NEEDED) .soname "<soname>" defines the current elf DT_SONAME (exported library name) .interp "<interpreter_path>" .nointerp defines the required ELF interpreter defaults to '/lib/ld.so' 'nil'/'none' remove the interpreter specification .pt_gnu_stack rw|rwx defines the PT_GNU_STACK flag (default: unspecified, => rwx) .init/.fini [<label>] defines the DT_INIT/DT_FINI dynamic tags, same semantic as .entrypoint .init_array/.fini_array/.preinit_array <label> [, <label>]* append to the DT_*_ARRAYs
# File metasm/exe_format/elf_encode.rb, line 1139 def parse_parser_instruction(instr) readstr = lambda { @lexer.skip_space t = nil raise instr, "string expected, found #{t.raw.inspect if t}" if not t = @lexer.readtok or (t.type != :string and t.type != :quoted) t.value || t.raw } check_eol = lambda { @lexer.skip_space t = nil raise instr, "eol expected, found #{t.raw.inspect if t}" if t = @lexer.nexttok and t.type != :eol } case instr.raw.downcase when '.text', '.data', '.rodata', '.bss' sname = instr.raw.downcase if not @sections.find { |s| s.name == sname } s = Section.new s.name = sname s.type = 'PROGBITS' s.encoded = EncodedData.new s.flags = case sname when '.text'; %w[ALLOC EXECINSTR] when '.data', '.bss'; %w[ALLOC WRITE] when '.rodata'; %w[ALLOC] end s.addralign = 8 encode_add_section s end @cursource = @source[sname] ||= [] check_eol[] if instr.backtrace # special case for magic @cursource when '.section' # .section <section name|"section name"> [(no)wxalloc] [base=<expr>] sname = readstr[] if not s = @sections.find { |s_| s_.name == sname } s = Section.new s.type = 'PROGBITS' s.name = sname s.encoded = EncodedData.new s.flags = ['ALLOC'] @sections << s end loop do @lexer.skip_space break if not tok = @lexer.nexttok or tok.type != :string case @lexer.readtok.raw.downcase when /^(no)?r?(w)?(x)?(alloc)?$/ ar = [] ar << 'WRITE' if $2 ar << 'EXECINSTR' if $3 ar << 'ALLOC' if $4 if $1; s.flags -= ar else s.flags |= ar end when 'base' @lexer.skip_space @lexer.readtok if tok = @lexer.nexttok and tok.type == :punct and tok.raw == '=' raise instr, 'bad section base' if not s.addr = Expression.parse(@lexer).reduce or not s.addr.kind_of? ::Integer else raise instr, 'unknown specifier' end end @cursource = @source[sname] ||= [] check_eol[] when '.entrypoint' # ".entrypoint <somelabel/expression>" or ".entrypoint" (here) @lexer.skip_space if tok = @lexer.nexttok and tok.type == :string raise instr if not entrypoint = Expression.parse(@lexer) else entrypoint = new_label('entrypoint') @cursource << Label.new(entrypoint, instr.backtrace.dup) end @header.entry = entrypoint check_eol[] when '.global', '.weak', '.local', '.symbol' if instr.raw == '.symbol' bind = readstr[] else bind = instr.raw[1..-1] end s = Symbol.new s.name = readstr[] s.type = 'FUNC' s.bind = bind.upcase # define s.section ? should check the section exporting s.target, but it may not be defined now # parse pseudo instruction arguments loop do @lexer.skip_space ntok = @lexer.readtok if not ntok or ntok.type == :eol @lexer.unreadtok ntok break end raise instr, "syntax error: string expected, found #{ntok.raw.inspect}" if ntok.type != :string case ntok.raw when 'undef' s.shndx = 'UNDEF' when 'plt' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: = expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :punct or ntok.raw != '=' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: label expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :string s.thunk = ntok.raw when 'type' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: = expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :punct or ntok.raw != '=' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: symbol type expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :string or not SYMBOL_TYPE.index(ntok.raw) s.type = ntok.raw when 'size' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: = expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :punct or ntok.raw != '=' @lexer.skip_space ntok = @lexer.readtok raise "syntax error: symbol size expected, found #{ntok.raw.inspect if ntok}" if not ntok or ntok.type != :string or not ntok.raw =~ /^\d+$/ s.size = ntok.raw.to_i else if not s.value s.value = ntok.raw elsif not s.size s.size = Expression[ntok.raw, :-, s.value] else raise instr, "syntax error: eol expected, found #{ntok.raw.inspect}" end end end s.value ||= s.name if not s.shndx and not s.thunk s.shndx ||= 1 if s.value @symbols << s when '.needed' # a required library (@tag['NEEDED'] ||= []) << readstr[] check_eol[] when '.soname' # exported library name @tag['SONAME'] = readstr[] check_eol[] @segments.delete_if { |s_| s_.type == 'INTERP' } @header.type = 'DYN' when '.interp', '.nointerp' # required ELF interpreter interp = ((instr.raw == '.nointerp') ? 'nil' : readstr[]) @segments.delete_if { |s_| s_.type == 'INTERP' } case interp.downcase when 'nil', 'no', 'none' @header.shnum = 0 else seg = Segment.new seg.type = 'INTERP' seg.encoded = EncodedData.new << interp << 0 seg.flags = ['R'] seg.memsz = seg.filesz = seg.encoded.length @segments.unshift seg end check_eol[] when '.pt_gnu_stack' # PT_GNU_STACK marking mode = readstr[] @segments.delete_if { |s_| s_.type == 'GNU_STACK' } s = Segment.new s.type = 'GNU_STACK' case mode when /^rw$/; s.flags = %w[R W] when /^rwx$/; s.flags = %w[R W X] else raise instr, "syntax error: expected rw|rwx, found #{mode.inspect}" end @segments << s when '.init', '.fini' # dynamic tag initialization @lexer.skip_space if tok = @lexer.nexttok and tok.type == :string raise instr, 'syntax error' if not init = Expression.parse(@lexer) else init = new_label(instr.raw[1..-1]) @cursource << Label.new(init, instr.backtrace.dup) end @tag[instr.raw[1..-1].upcase] = init check_eol[] when '.init_array', '.fini_array', '.preinit_array' t = @tag[instr.raw[1..-1].upcase] ||= [] loop do raise instr, 'syntax error' if not e = Expression.parse(@lexer) t << e @lexer.skip_space ntok = @lexer.nexttok break if not ntok or ntok.type == :eol raise instr, "syntax error, ',' expected, found #{ntok.raw.inspect}" if nttok != :punct or ntok.raw != ',' @lexer.readtok end else super(instr) end end
handles C attributes: export, export_as(foo), import, import_from(libc.so.6), init, fini, entrypoint
# File metasm/exe_format/elf_encode.rb, line 1380 def read_c_attrs(cp) cp.toplevel.symbol.each_value { |v| next if not v.kind_of? C::Variable if v.has_attribute 'export' or ea = v.has_attribute_var('export_as') s = Symbol.new s.name = ea || v.name s.type = v.type.kind_of?(C::Function) ? 'FUNC' : 'NOTYPE' s.bind = 'GLOBAL' s.shndx = 1 s.value = v.name @symbols << s end if v.has_attribute 'import' or ln = v.has_attribute_var('import_from') (@tag['NEEDED'] ||= []) << ln if ln and not @tag['NEEDED'].to_a.include? ln s = Symbol.new s.name = v.name s.type = v.type.kind_of?(C::Function) ? 'FUNC' : 'NOTYPE' s.bind = 'GLOBAL' s.shndx = 'UNDEF' @symbols << s end if v.has_attribute('init') or v.has_attribute('constructor') (@tag['INIT_ARRAY'] ||= []) << v.name end if v.has_attribute('fini') or v.has_attribute('destructor') (@tag['FINI_ARRAY'] ||= []) << v.name end if v.has_attribute 'entrypoint' @header.entry = v.name end } end
# File metasm/exe_format/elf_decode.rb, line 53 def readstr(str, off) if off > 0 and i = str.index(\0, off) rescue false # LoadedElf with arbitrary pointer... str[off...i] end end
returns the target of a relocation using reloc.symbol may create new labels if the relocation targets a section
# File metasm/exe_format/elf_decode.rb, line 548 def reloc_target(reloc) target = 0 if reloc.symbol.kind_of?(Symbol) if reloc.symbol.type == 'SECTION' s = @sections[reloc.symbol.shndx] if not target = @encoded.inv_export[s.offset] target = new_label(s.name) @encoded.add_export(target, s.offset) end elsif reloc.symbol.name target = reloc.symbol.name end end target end
returns an array of [name, addr, length, info]
# File metasm/exe_format/elf_decode.rb, line 1028 def section_info if @sections @sections[1..-1].map { |s| [s.name, s.addr, s.size, s.flags.join(',')] } else @segments.map { |s| [nil, s.vaddr, s.memsz, s.flags.join(',')] } end end
# File metasm/exe_format/elf.rb, line 666 def shortname; 'elf'; end
# File metasm/exe_format/elf.rb, line 668 def sizeof_byte ; 1 ; end
# File metasm/exe_format/elf.rb, line 669 def sizeof_half ; 2 ; end
# File metasm/exe_format/elf.rb, line 671 def sizeof_sword ; 4 ; end
# File metasm/exe_format/elf.rb, line 670 def sizeof_word ; 4 ; end
# File metasm/exe_format/elf.rb, line 672 def sizeof_xword ; @bitsize == 32 ? 4 : 8 ; end
set the data model
# File metasm/exe_format/elf_encode.rb, line 1374 def tune_cparser(cp) super(cp) cp.lp64 if @cpu.size == 64 end
defines __ELF__
# File metasm/exe_format/elf_encode.rb, line 1369 def tune_prepro(l) l.define_weak('__ELF__', 1) end