/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Alexander V. Astapchuk
*/
/**
* @file
* @brief Main decoding (disassembling) routines implementation.
*/
#include "dec_base.h"
#include "enc_prvt.h"
#include <stdio.h>
//#include "open/common.h"
bool DecoderBase::is_prefix(const unsigned char * bytes)
{
unsigned char b0 = *bytes;
unsigned char b1 = *(bytes+1);
if (b0 == 0xF0) { // LOCK
return true;
}
if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
return false;
}
return true;
}
if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
// branch hints, segment prefixes
return true;
}
if (b0==0x66) { // operand-size prefix
if (b1 == 0x0F) { // .... but may be a part of SIMD opcode
return false;
}
return false; //XXX - currently considered as part of opcode//true;
}
if (b0==0x67) { // address size prefix
return true;
}
return false;
}
// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
{
const unsigned char * my_bytes = bytes;
while( 1 )
{
unsigned char by1 = *my_bytes;
unsigned char by2 = *(my_bytes + 1);
Inst::PrefGroups where;
switch( by1 )
{
case InstPrefix_REPNE:
case InstPrefix_REP:
{
if( 0x0F == by2)
{
return pinst->prefc;
}
}
case InstPrefix_LOCK:
{
where = Inst::Group1;
break;
}
case InstPrefix_CS:
case InstPrefix_SS:
case InstPrefix_DS:
case InstPrefix_ES:
case InstPrefix_FS:
case InstPrefix_GS:
// case InstPrefix_HintTaken: the same as CS override
// case InstPrefix_HintNotTaken: the same as DS override
{
where = Inst::Group2;
break;
}
case InstPrefix_OpndSize:
{
//NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
// here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
// it will match JMP Sz32 ...
//HACK: assume it is the last prefix, return any way
if( 0x0F == by2)
{
return pinst->prefc;
}
return pinst->prefc;
where = Inst::Group3;
break;
}
case InstPrefix_AddrSize:
{
where = Inst::Group4;
break;
}
default:
{
return pinst->prefc;
}
}
// Assertions are not allowed here.
// Error situations should result in returning error status
if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
return (unsigned int)-1;
pinst->pref[where] = (InstPrefix)by1;
if (pinst->prefc >= 4) //no more than 4 prefixes
return (unsigned int)-1;
pinst->prefc++;
++my_bytes;
}
}
unsigned DecoderBase::decode(const void * addr, Inst * pinst)
{
Inst tmp;
//assert( *(unsigned char*)addr != 0x66);
const unsigned char * bytes = (unsigned char*)addr;
// Load up to 4 prefixes
// for each Mnemonic
unsigned int pref_count = fill_prefs(bytes, &tmp);
if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
return 0; // Error
bytes += pref_count;
// for each opcodedesc
// if (raw_len == 0) memcmp(, raw_len)
// else check the mixed state which is one of the following:
// /digit /i /rw /rd /rb
bool found = false;
const unsigned char * saveBytes = bytes;
for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
bytes = saveBytes;
found=try_mn((Mnemonic)mn, &bytes, &tmp);
if (found) {
tmp.mn = (Mnemonic)mn;
break;
}
}
if (!found) {
// Unknown opcode
return 0;
}
tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
if (pinst) {
*pinst = tmp;
}
return tmp.size;
}
#ifdef _EM64T_
#define EXTEND_REG(reg, flag) \
((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
#else
#define EXTEND_REG(reg, flag) (reg)
#endif
//don't know the use of rex, seems not used when _EM64T_ is not enabled
bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
const unsigned char ** pbuf, Inst * pinst
#ifdef _EM64T_
, const Rex UNREF *rex
#endif
)
{
OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
unsigned data_byte = **pbuf;
EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
switch (kind) {
case OpcodeByteKind_SlashR:
{
RegName reg;
OpndKind okind;
const ModRM& modrm = *(ModRM*)*pbuf;
if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
#ifdef _EM64T_
decodeModRM(odesc, pbuf, pinst, rex);
#else
decodeModRM(odesc, pbuf, pinst);
#endif
++pinst->argc;
const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
regOpnd = EncoderBase::Operand(reg);
} else { // 2nd operand is memory
okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
regOpnd = EncoderBase::Operand(reg);
++pinst->argc;
#ifdef _EM64T_
decodeModRM(odesc, pbuf, pinst, rex);
#else
decodeModRM(odesc, pbuf, pinst);
#endif
}
++pinst->argc;
}
return true;
case OpcodeByteKind_rb:
case OpcodeByteKind_rw:
case OpcodeByteKind_rd:
{
// Gregory -
// Here we don't parse register because for current needs
// disassembler doesn't require to parse all operands
unsigned regid = data_byte - byte;
if (regid>7) {
return false;
}
OpndSize opnd_size;
switch(kind)
{
case OpcodeByteKind_rb:
{
opnd_size = OpndSize_8;
break;
}
case OpcodeByteKind_rw:
{
opnd_size = OpndSize_16;
break;
}
case OpcodeByteKind_rd:
{
opnd_size = OpndSize_32;
break;
}
default:
opnd_size = OpndSize_32; // so there is no compiler warning
assert( false );
}
opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );
++pinst->argc;
++*pbuf;
return true;
}
case OpcodeByteKind_cb:
{
char offset = *(char*)*pbuf;
*pbuf += 1;
opnd = EncoderBase::Operand(offset);
++pinst->argc;
//pinst->direct_addr = (void*)(pinst->offset + *pbuf);
}
return true;
case OpcodeByteKind_cw:
// not an error, but not expected in current env
// Android x86
{
short offset = *(short*)*pbuf;
*pbuf += 2;
opnd = EncoderBase::Operand(offset);
++pinst->argc;
}
return true;
//return false;
case OpcodeByteKind_cd:
{
int offset = *(int*)*pbuf;
*pbuf += 4;
opnd = EncoderBase::Operand(offset);
++pinst->argc;
}
return true;
case OpcodeByteKind_SlashNum:
{
const ModRM& modrm = *(ModRM*)*pbuf;
if (modrm.reg != byte) {
return false;
}
decodeModRM(odesc, pbuf, pinst
#ifdef _EM64T_
, rex
#endif
);
++pinst->argc;
}
return true;
case OpcodeByteKind_ib:
{
char ival = *(char*)*pbuf;
opnd = EncoderBase::Operand(ival);
++pinst->argc;
*pbuf += 1;
}
return true;
case OpcodeByteKind_iw:
{
short ival = *(short*)*pbuf;
opnd = EncoderBase::Operand(ival);
++pinst->argc;
*pbuf += 2;
}
return true;
case OpcodeByteKind_id:
{
int ival = *(int*)*pbuf;
opnd = EncoderBase::Operand(ival);
++pinst->argc;
*pbuf += 4;
}
return true;
#ifdef _EM64T_
case OpcodeByteKind_io:
{
long long int ival = *(long long int*)*pbuf;
opnd = EncoderBase::Operand(OpndSize_64, ival);
++pinst->argc;
*pbuf += 8;
}
return true;
#endif
case OpcodeByteKind_plus_i:
{
unsigned regid = data_byte - byte;
if (regid>7) {
return false;
}
++*pbuf;
return true;
}
case OpcodeByteKind_ZeroOpcodeByte: // cant be here
return false;
default:
// unknown kind ? how comes ?
break;
}
return false;
}
bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
const unsigned char * save_pbuf = *pbuf;
EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];
for (unsigned i=0; !opcodes[i].last; i++) {
const EncoderBase::OpcodeDesc& odesc = opcodes[i];
char *opcode_ptr = const_cast<char *>(odesc.opcode);
int opcode_len = odesc.opcode_len;
#ifdef _EM64T_
Rex *prex = NULL;
Rex rex;
#endif
*pbuf = save_pbuf;
#ifdef _EM64T_
// Match REX prefixes
unsigned char rex_byte = (*pbuf)[0];
if ((rex_byte & 0xf0) == 0x40)
{
if ((rex_byte & 0x08) != 0)
{
// Have REX.W
if (opcode_len > 0 && opcode_ptr[0] == 0x48)
{
// Have REX.W in opcode. All mnemonics that allow
// REX.W have to have specified it in opcode,
// otherwise it is not allowed
rex = *(Rex *)*pbuf;
prex = &rex;
(*pbuf)++;
opcode_ptr++;
opcode_len--;
}
}
else
{
// No REX.W, so it doesn't have to be in opcode. We
// have REX.B, REX.X, REX.R or their combination, but
// not in opcode, they may extend any part of the
// instruction
rex = *(Rex *)*pbuf;
prex = &rex;
(*pbuf)++;
}
}
#endif
if (opcode_len != 0) {
if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
continue;
}
*pbuf += opcode_len;
}
if (odesc.aux0 != 0) {
if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
#ifdef _EM64T_
, prex
#endif
)) {
continue;
}
if (odesc.aux1 != 0) {
if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
#ifdef _EM64T_
, prex
#endif
)) {
continue;
}
}
pinst->odesc = &opcodes[i];
return true;
}
else {
// Can't have empty opcode
assert(opcode_len != 0);
pinst->odesc = &opcodes[i];
return true;
}
}
return false;
}
bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
const unsigned char ** pbuf, Inst * pinst
#ifdef _EM64T_
, const Rex *rex
#endif
)
{
EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];
//XXX debug ///assert(0x66 != *(*pbuf-2));
const ModRM& modrm = *(ModRM*)*pbuf;
*pbuf += 1;
RegName base = RegName_Null;
RegName index = RegName_Null;
int disp = 0;
unsigned scale = 0;
// On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
// Therefore REX.W is simply ignored, and opndDesc.size is used
if (modrm.mod == 3) {
// we have only modrm. no sib, no disp.
// Android x86: Use XMMReg for 64b operand.
OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
opnd = EncoderBase::Operand(reg);
return true;
}
//Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
//base and index should be 32 bits!!!
const SIB& sib = *(SIB*)*pbuf;
// check whether we have a sib
if (modrm.rm == 4) {
// yes, we have SIB
*pbuf += 1;
// scale = sib.scale == 0 ? 0 : (1<<sib.scale);
scale = (1<<sib.scale);
if (sib.index != 4) {
index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
} else {
// (sib.index == 4) => no index
//%esp can't be sib.index
}
if (sib.base != 5 || modrm.mod != 0) {
base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
} else {
// (sib.base == 5 && modrm.mod == 0) => no base
}
}
else {
if (modrm.mod != 0 || modrm.rm != 5) {
base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
}
else {
// mod=0 && rm == 5 => only disp32
}
}
//update disp and pbuf
if (modrm.mod == 2) {
// have disp32
disp = *(int*)*pbuf;
*pbuf += 4;
}
else if (modrm.mod == 1) {
// have disp8
disp = *(char*)*pbuf;
*pbuf += 1;
}
else {
assert(modrm.mod == 0);
if (modrm.rm == 5) {
// have disp32 w/o sib
disp = *(int*)*pbuf;
*pbuf += 4;
}
else if (modrm.rm == 4 && sib.base == 5) {
// have disp32 with SI in sib
disp = *(int*)*pbuf;
*pbuf += 4;
}
}
opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
return true;
}