ModRM represents indirections in x86 (eg dword ptr [eax+4*ebx+12h])
valid combinaisons for a modrm ints are reg indexes, symbols are immediates, except :sib
# File metasm/cpu/ia32/decode.rb, line 13 def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, h = {}) m = (byte >> 6) & 3 rm = byte & 7 if m == 3 return regclass.new(rm, opsz) end sum = Sum[adsz][m][rm] s, i, b, imm = nil sum.each { |a| case a when Integer if not b b = Reg.new(a, adsz) else s = 1 if h[:mrmvex] i = SimdReg.new(a, h[:mrmvex]) else i = Reg.new(a, adsz) end end when :sib sib = edata.get_byte.to_i ii = ((sib >> 3) & 7) if ii != 4 s = 1 << ((sib >> 6) & 3) if h[:mrmvex] i = SimdReg.new(ii, h[:mrmvex]) else i = Reg.new(ii, adsz) end end bb = sib & 7 if bb == 5 and m == 0 imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)] else b = Reg.new(bb, adsz) end when :i8, :i16, :i32 imm = Expression[edata.decode_imm(a, endianness)] end } if imm and ir = imm.reduce and ir.kind_of?(Integer) and ir < 0 and (ir < -0x10_0000 or (!b and !i)) # probably a base address -> unsigned imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)] end opsz = h[:argsz] if h[:argsz] new adsz, opsz, s, i, b, imm, seg end
returns the byte representing the register encoded as modrm works with Reg/SimdReg
# File metasm/cpu/ia32/encode.rb, line 16 def self.encode_reg(reg, mregval = 0) 0xc0 | (mregval << 3) | reg.val end
creates a new ModRM with the specified attributes:
adsz (16/32), sz (8/16/32: byte ptr, word ptr, dword ptr)
s, i, b, imm
segment selector override
# File metasm/cpu/ia32/main.rb, line 176 def initialize(adsz, sz, s, i, b, imm, seg = nil) @adsz, @sz = adsz, sz @s, @i = s, i if i @b = b if b @imm = imm if imm @seg = seg if seg end
may return a SegReg must be called before SegReg parser (which could match only the seg part of a modrm)
# File metasm/cpu/ia32/parse.rb, line 16 def self.parse(lexer, otok, cpu) tok = otok # read operand size specifier if tok and tok.type == :string and tok.raw =~ /^(?:byte|[dqo]?word|_(\d+)bits)$/ ptsz = if $1 $1.to_i else case tok.raw when 'byte'; 8 when 'word'; 16 when 'dword'; 32 when 'qword'; 64 when 'oword'; 128 else raise otok, 'mrm: bad ptr size' end end lexer.skip_space if tok = lexer.readtok and tok.type == :string and tok.raw == 'ptr' lexer.skip_space tok = lexer.readtok end end # read segment selector if tok and tok.type == :string and seg = SegReg.s_to_i[tok.raw] lexer.skip_space seg = SegReg.new(seg) if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' raise otok, 'invalid modrm' if ptsz lexer.unreadtok ntok return seg end lexer.skip_space tok = lexer.readtok end # ensure we have a modrm if not tok or tok.type != :punct or tok.raw != '[' raise otok, 'invalid modrm' if ptsz or seg return end lexer.skip_space_eol # support fasm syntax [fs:eax] for segment selector if tok = lexer.readtok and tok.type == :string and not seg and seg = SegReg.s_to_i[tok.raw] raise otok, 'invalid modrm' if not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ':' seg = SegReg.new(seg) lexer.skip_space_eol else lexer.unreadtok tok end # read modrm content as generic expression content = Expression.parse(lexer) lexer.skip_space_eol raise(otok, 'bad modrm') if not content or not ntok = lexer.readtok or ntok.type != :punct or ntok.raw != ']' # converts matching externals to Regs in an expression regify = lambda { |o| case o when Expression o.lexpr = regify[o.lexpr] o.rexpr = regify[o.rexpr] o when String cpu.str_to_reg(o) || o else o end } s = i = b = imm = nil # assigns the Regs in the expression to base or index field of the modrm walker = lambda { |o| case o when nil when Reg if b raise otok, 'mrm: too many regs' if i i = o s = 1 else b = o end when SimdReg raise otok, 'mrm: too many regs' if i i = o s = 1 when Expression if o.op == :* and (o.rexpr.kind_of?(Reg) or o.lexpr.kind_of?(Reg)) # scaled index raise otok, 'mrm: too many indexes' if i s = o.lexpr i = o.rexpr s, i = i, s if s.kind_of? Reg raise otok, "mrm: bad scale #{s}" unless [1, 2, 4, 8].include?(s) elsif o.op == :+ # recurse walker[o.lexpr] walker[o.rexpr] else # found (a part of) the immediate imm = Expression[imm, :+, o] end else # found (a part of) the immediate imm = Expression[imm, :+, o] end } # do it walker[regify[content.reduce]] # ensure found immediate is really an immediate raise otok, 'mrm: reg in imm' if imm.kind_of?(Expression) and not imm.externals.grep(Reg).empty? raise otok, 'mrm: bad reg size' if b.kind_of?(Reg) and i.kind_of?(Reg) and b.sz != i.sz # find default address size adsz = b ? b.sz : i ? i.sz : nil # ptsz may be nil now, will be fixed up later (in parse_instr_fixup) to match another instruction argument's size new adsz, ptsz, s, i, b, imm, seg end
# File metasm/cpu/ia32/main.rb, line 196 def ==(o) self.class == o.class and s == o.s and i == o.i and b == o.b and imm == o.imm and seg == o.seg and adsz == o.adsz and sz == o.sz end
# File metasm/cpu/ia32/render.rb, line 55 def context {'set targetsz' => lambda { |s| @sz = s }, 'set seg' => lambda { |s| @seg = Seg.new s }} end
The argument is an integer representing the 'reg' field of the mrm
caller is responsible for setting the adsz returns an array, 1 element per possible immediate size (for un-reduce()able Expression)
# File metasm/cpu/ia32/encode.rb, line 24 def encode(reg = 0, endianness = :little) reg = reg.val if reg.kind_of? Argument case @adsz when 16; encode16(reg, endianness) when 32; encode32(reg, endianness) end end
# File metasm/cpu/ia32/render.rb, line 32 def qualifier(sz) { 8 => 'byte', 16 => 'word', 32 => 'dword', 64 => 'qword', 128 => 'oword' }.fetch(sz) { |k| "_#{sz}bits" } end
# File metasm/cpu/ia32/render.rb, line 43 def render r = [] r << ( qualifier(@sz) << ' ptr ' ) if @sz and (not instruction or not @instruction.args.find { |a| a.kind_of? Reg and a.sz == @sz }) r << @seg << ':' if seg e = nil e = Expression[e, :+, @b] if b e = Expression[e, :+, @imm] if imm e = Expression[e, :+, (@s == 1 ? @i : [@s, :*, @i])] if s r << '[' << e << ']' end
returns the symbolic representation of the ModRM (ie an Indirection) segment selectors are represented as eg “segment_base_fs” not present when same as implicit (ds:edx, ss:esp)
# File metasm/cpu/ia32/main.rb, line 187 def symbolic(di=nil) p = nil p = Expression[p, :+, @b.symbolic(di)] if b p = Expression[p, :+, [@s, :*, @i.symbolic(di)]] if i p = Expression[p, :+, @imm] if imm p = Expression["segment_base_#@seg", :+, p] if seg and seg.val != ((b && (@b.val == 4 || @b.val == 5)) ? 2 : 3) Indirection[p.reduce, @sz/8, (di.address if di)] end
# File metasm/cpu/ia32/encode.rb, line 33 def encode16(reg, endianness) if not b # imm only return [EncodedData.new << (6 | (reg << 3)) << @imm.encode(:u16, endianness)] end imm = @imm.reduce if self.imm imm = nil if imm == 0 ret = EncodedData.new ret << case [@b.val, (@i.val if i)] when [3, 6], [6, 3]; 0 when [3, 7], [7, 3]; 1 when [5, 6], [6, 5]; 2 when [5, 7], [7, 5]; 3 when [6, nil]; 4 when [7, nil]; 5 when [5, nil] imm ||= 0 6 when [3, nil]; 7 else raise InvalidModRM, 'invalid modrm16' end # add bits in the first octet of ret.data (1.9 compatibility layer) or_bits = lambda { |v| # rape me if ret.data[0].kind_of? Integer ret.data[0] |= v else ret.data[0] = (ret.data[0].unpack('C').first | v).chr end } or_bits[reg << 3] if imm case Expression.in_range?(imm, :i8) when true or_bits[1 << 6] [ret << Expression.encode_imm(imm, :i8, endianness)] when false or_bits[2 << 6] [ret << Expression.encode_imm(imm, :a16, endianness)] when nil rets = ret.dup or_bits[1<<6] ret << @imm.encode(:i8, endianness) ret, rets = rets, ret # or_bits uses ret or_bits[2<<6] ret << @imm.encode(:a16, endianness) [ret, rets] end else [ret] end end
# File metasm/cpu/ia32/encode.rb, line 90 def encode32(reg, endianness) # 0 => [ [0 ], [1 ], [2 ], [3 ], [:sib ], [:i32 ], [6 ], [7 ] ], \ # 1 => [ [0, :i8 ], [1, :i8 ], [2, :i8 ], [3, :i8 ], [:sib, :i8 ], [5, :i8 ], [6, :i8 ], [7, :i8 ] ], \ # 2 => [ [0, :i32], [1, :i32], [2, :i32], [3, :i32], [:sib, :i32], [5, :i32], [6, :i32], [7, :i32] ] # # b => 0 1 2 3 4 5+i|i 6 7 # i => 0 1 2 3 nil 5 6 7 ret = EncodedData.new << (reg << 3) # add bits in the first octet of ret.data (1.9 compatibility layer) or_bits = lambda { |v| # rape me if ret.data[0].kind_of? Integer ret.data[0] |= v else ret.data[0] = (ret.data[0].unpack('C').first | v).chr end } if not self.b and not self.i or_bits[5] [ret << @imm.encode(:a32, endianness)] elsif not self.b and self.s != 1 # sib with no b raise EncodeError, "Invalid ModRM #{self}" if @i.val == 4 or_bits[4] s = {8=>3, 4=>2, 2=>1}[@s] imm = self.imm || Expression[0] fu = (s << 6) | (@i.val << 3) | 5 fu = fu.chr if s >= 2 # rb1.9 encoding fix [ret << fu << imm.encode(:a32, endianness)] else imm = @imm.reduce if self.imm imm = nil if imm == 0 if not self.i or (not self.b and self.s == 1) # no sib byte (except for [esp]) b = self.b || self.i or_bits[b.val] ret << 0x24 if b.val == 4 else # sib or_bits[4] i, b = @i, @b b, i = i, b if @s == 1 and (i.val == 4 or b.val == 5) raise EncodeError, "Invalid ModRM #{self}" if i.val == 4 s = {8=>3, 4=>2, 2=>1, 1=>0}[@s] fu = (s << 6) | (i.val << 3) | b.val fu = fu.chr if s >= 2 # rb1.9 encoding fix ret << fu end imm ||= 0 if b.val == 5 if imm case Expression.in_range?(imm, :i8) when true or_bits[1<<6] [ret << Expression.encode_imm(imm, :i8, endianness)] when false or_bits[2<<6] [ret << Expression.encode_imm(imm, :a32, endianness)] when nil rets = ret.dup or_bits[1<<6] ret << @imm.encode(:i8, endianness) rets, ret = ret, rets # or_bits[] modifies ret directly or_bits[2<<6] ret << @imm.encode(:a32, endianness) [ret, rets] end else [ret] end end end