dec_base.cpp - Android社区 - https://www.androidos.net.cn/

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */
/**
 * @author Alexander V. Astapchuk
 */

/**
 * @file
 * @brief Main decoding (disassembling) routines implementation.
 */

#include "dec_base.h"
#include "enc_prvt.h"
#include <stdio.h>
//#include "open/common.h"

bool DecoderBase::is_prefix(const unsigned char * bytes)
{
    unsigned char b0 = *bytes;
    unsigned char b1 = *(bytes+1);
    if (b0 == 0xF0) { // LOCK
        return true;
    }
    if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes
        if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
            return false;
        }
        return true;
    }
    if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) {
        // branch hints, segment prefixes
        return true;
    }
    if (b0==0x66) { // operand-size prefix
        if (b1 == 0x0F) {   // .... but may be a part of SIMD opcode
            return false;
        }
        return false; //XXX - currently considered as part of opcode//true;
    }
    if (b0==0x67) { // address size prefix
        return true;
    }
    return false;
}

// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error
unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst)
{
    const unsigned char * my_bytes = bytes;

    while( 1 )
    {
        unsigned char by1 = *my_bytes;
        unsigned char by2 = *(my_bytes + 1);
        Inst::PrefGroups where;

        switch( by1 )
        {
        case InstPrefix_REPNE:
        case InstPrefix_REP:
        {
            if( 0x0F == by2)
            {
                return pinst->prefc;
            }
        }
        case InstPrefix_LOCK:
        {
            where = Inst::Group1;
            break;
        }
        case InstPrefix_CS:
        case InstPrefix_SS:
        case InstPrefix_DS:
        case InstPrefix_ES:
        case InstPrefix_FS:
        case InstPrefix_GS:
//      case InstPrefix_HintTaken: the same as CS override
//      case InstPrefix_HintNotTaken: the same as DS override
        {
            where = Inst::Group2;
            break;
        }
        case InstPrefix_OpndSize:
        {
//NOTE:   prefix does not work for JMP Sz16, the opcode is 0x66 0xe9
//        here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9
//        it will match JMP Sz32 ...
//HACK:   assume it is the last prefix, return any way
            if( 0x0F == by2)
            {
                return pinst->prefc;
            }
            return pinst->prefc;
            where = Inst::Group3;
            break;
        }
        case InstPrefix_AddrSize:
        {
            where = Inst::Group4;
            break;
        }
        default:
        {
            return pinst->prefc;
        }
        }
        // Assertions are not allowed here.
        // Error situations should result in returning error status
        if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group
            return (unsigned int)-1;

        pinst->pref[where] = (InstPrefix)by1;

        if (pinst->prefc >= 4) //no more than 4 prefixes
            return (unsigned int)-1;

        pinst->prefc++;
        ++my_bytes;
    }
}



unsigned DecoderBase::decode(const void * addr, Inst * pinst)
{
    Inst tmp;

    //assert( *(unsigned char*)addr != 0x66);

    const unsigned char * bytes = (unsigned char*)addr;

    // Load up to 4 prefixes
    // for each Mnemonic
    unsigned int pref_count = fill_prefs(bytes, &tmp);

    if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes
        return 0; // Error

    bytes += pref_count;

    //  for each opcodedesc
    //      if (raw_len == 0) memcmp(, raw_len)
    //  else check the mixed state which is one of the following:
    //      /digit /i /rw /rd /rb

    bool found = false;
    const unsigned char * saveBytes = bytes;
    for (unsigned mn=1; mn<Mnemonic_Count; mn++) {
        bytes = saveBytes;
        found=try_mn((Mnemonic)mn, &bytes, &tmp);
        if (found) {
            tmp.mn = (Mnemonic)mn;
            break;
        }
    }
    if (!found) {
        // Unknown opcode
        return 0;
    }
    tmp.size = (unsigned)(bytes-(const unsigned char*)addr);
    if (pinst) {
        *pinst = tmp;
    }
    return tmp.size;
}

#ifdef _EM64T_
#define EXTEND_REG(reg, flag)                        \
    ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8))
#else
#define EXTEND_REG(reg, flag) (reg)
#endif

//don't know the use of rex, seems not used when _EM64T_ is not enabled
bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux,
    const unsigned char ** pbuf, Inst * pinst
#ifdef _EM64T_
    , const Rex UNREF *rex
#endif
    )
{
    OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask);
    unsigned byte = (aux & OpcodeByteKind_OpcodeMask);
    unsigned data_byte = **pbuf;
    EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
    const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];

    switch (kind) {
    case OpcodeByteKind_SlashR:
        {
            RegName reg;
            OpndKind okind;
            const ModRM& modrm = *(ModRM*)*pbuf;
            if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory
#ifdef _EM64T_
                decodeModRM(odesc, pbuf, pinst, rex);
#else
                decodeModRM(odesc, pbuf, pinst);
#endif
                ++pinst->argc;
                const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc];
                okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
                EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
                reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r));
                regOpnd = EncoderBase::Operand(reg);
            } else {                            // 2nd operand is memory
                okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
                EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc];
                reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r));
                regOpnd = EncoderBase::Operand(reg);
                ++pinst->argc;
#ifdef _EM64T_
                decodeModRM(odesc, pbuf, pinst, rex);
#else
                decodeModRM(odesc, pbuf, pinst);
#endif
            }
            ++pinst->argc;
        }
        return true;
    case OpcodeByteKind_rb:
    case OpcodeByteKind_rw:
    case OpcodeByteKind_rd:
        {
            // Gregory -
            // Here we don't parse register because for current needs
            // disassembler doesn't require to parse all operands
            unsigned regid = data_byte - byte;
            if (regid>7) {
                return false;
            }
            OpndSize opnd_size;
            switch(kind)
            {
            case OpcodeByteKind_rb:
            {
                opnd_size = OpndSize_8;
                break;
            }
            case OpcodeByteKind_rw:
            {
                opnd_size = OpndSize_16;
                break;
            }
            case OpcodeByteKind_rd:
            {
                opnd_size = OpndSize_32;
                break;
            }
            default:
                opnd_size = OpndSize_32;  // so there is no compiler warning
                assert( false );
            }
            opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) );

            ++pinst->argc;
            ++*pbuf;
            return true;
        }
    case OpcodeByteKind_cb:
        {
        char offset = *(char*)*pbuf;
        *pbuf += 1;
        opnd = EncoderBase::Operand(offset);
        ++pinst->argc;
        //pinst->direct_addr = (void*)(pinst->offset + *pbuf);
        }
        return true;
    case OpcodeByteKind_cw:
        // not an error, but not expected in current env
        // Android x86
        {
        short offset = *(short*)*pbuf;
        *pbuf += 2;
        opnd = EncoderBase::Operand(offset);
        ++pinst->argc;
        }
        return true;
        //return false;
    case OpcodeByteKind_cd:
        {
        int offset = *(int*)*pbuf;
        *pbuf += 4;
        opnd = EncoderBase::Operand(offset);
        ++pinst->argc;
        }
        return true;
    case OpcodeByteKind_SlashNum:
        {
        const ModRM& modrm = *(ModRM*)*pbuf;
        if (modrm.reg != byte) {
            return false;
        }
        decodeModRM(odesc, pbuf, pinst
#ifdef _EM64T_
                        , rex
#endif
                        );
        ++pinst->argc;
        }
        return true;
    case OpcodeByteKind_ib:
        {
        char ival = *(char*)*pbuf;
        opnd = EncoderBase::Operand(ival);
        ++pinst->argc;
        *pbuf += 1;
        }
        return true;
    case OpcodeByteKind_iw:
        {
        short ival = *(short*)*pbuf;
        opnd = EncoderBase::Operand(ival);
        ++pinst->argc;
        *pbuf += 2;
        }
        return true;
    case OpcodeByteKind_id:
        {
        int ival = *(int*)*pbuf;
        opnd = EncoderBase::Operand(ival);
        ++pinst->argc;
        *pbuf += 4;
        }
        return true;
#ifdef _EM64T_
    case OpcodeByteKind_io:
        {
        long long int ival = *(long long int*)*pbuf;
        opnd = EncoderBase::Operand(OpndSize_64, ival);
        ++pinst->argc;
        *pbuf += 8;
        }
        return true;
#endif
    case OpcodeByteKind_plus_i:
        {
            unsigned regid = data_byte - byte;
            if (regid>7) {
                return false;
            }
            ++*pbuf;
            return true;
        }
    case OpcodeByteKind_ZeroOpcodeByte: // cant be here
        return false;
    default:
        // unknown kind ? how comes ?
        break;
    }
    return false;
}

bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) {
    const unsigned char * save_pbuf = *pbuf;
    EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn];

    for (unsigned i=0; !opcodes[i].last; i++) {
        const EncoderBase::OpcodeDesc& odesc = opcodes[i];
        char *opcode_ptr = const_cast<char *>(odesc.opcode);
        int opcode_len = odesc.opcode_len;
#ifdef _EM64T_
        Rex *prex = NULL;
        Rex rex;
#endif

        *pbuf = save_pbuf;
#ifdef _EM64T_
        // Match REX prefixes
        unsigned char rex_byte = (*pbuf)[0];
        if ((rex_byte & 0xf0) == 0x40)
        {
            if ((rex_byte & 0x08) != 0)
            {
                // Have REX.W
                if (opcode_len > 0 && opcode_ptr[0] == 0x48)
                {
                    // Have REX.W in opcode. All mnemonics that allow
                    // REX.W have to have specified it in opcode,
                    // otherwise it is not allowed
                    rex = *(Rex *)*pbuf;
                    prex = &rex;
                    (*pbuf)++;
                    opcode_ptr++;
                    opcode_len--;
                }
            }
            else
            {
                // No REX.W, so it doesn't have to be in opcode. We
                // have REX.B, REX.X, REX.R or their combination, but
                // not in opcode, they may extend any part of the
                // instruction
                rex = *(Rex *)*pbuf;
                prex = &rex;
                (*pbuf)++;
            }
        }
#endif
        if (opcode_len != 0) {
            if (memcmp(*pbuf, opcode_ptr, opcode_len)) {
                continue;
            }
            *pbuf += opcode_len;
        }
        if (odesc.aux0 != 0) {

            if (!decode_aux(odesc, odesc.aux0, pbuf, pinst
#ifdef _EM64T_
                            , prex
#endif
                            )) {
                continue;
            }
            if (odesc.aux1 != 0) {
                if (!decode_aux(odesc, odesc.aux1, pbuf, pinst
#ifdef _EM64T_
                            , prex
#endif
                            )) {
                    continue;
                }
            }
            pinst->odesc = &opcodes[i];
            return true;
        }
        else {
            // Can't have empty opcode
            assert(opcode_len != 0);
            pinst->odesc = &opcodes[i];
            return true;
        }
    }
    return false;
}

bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc,
    const unsigned char ** pbuf, Inst * pinst
#ifdef _EM64T_
    , const Rex *rex
#endif
    )
{
    EncoderBase::Operand& opnd = pinst->operands[pinst->argc];
    const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc];

    //XXX debug ///assert(0x66 != *(*pbuf-2));
    const ModRM& modrm = *(ModRM*)*pbuf;
    *pbuf += 1;

    RegName base = RegName_Null;
    RegName index = RegName_Null;
    int disp = 0;
    unsigned scale = 0;

    // On x86_64 all mnemonics that allow REX.W have REX.W in opcode.
    // Therefore REX.W is simply ignored, and opndDesc.size is used

    if (modrm.mod == 3) {
        // we have only modrm. no sib, no disp.
        // Android x86: Use XMMReg for 64b operand.
        OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg;
        RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b));
        opnd = EncoderBase::Operand(reg);
        return true;
    }
    //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory
    //base and index should be 32 bits!!!
    const SIB& sib = *(SIB*)*pbuf;
    // check whether we have a sib
    if (modrm.rm == 4) {
        // yes, we have SIB
        *pbuf += 1;
        // scale = sib.scale == 0 ? 0 : (1<<sib.scale);
        scale = (1<<sib.scale);
        if (sib.index != 4) {
            index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size
        } else {
            // (sib.index == 4) => no index
            //%esp can't be sib.index
        }

        if (sib.base != 5 || modrm.mod != 0) {
            base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size
        } else {
            // (sib.base == 5 && modrm.mod == 0) => no base
        }
    }
    else {
        if (modrm.mod != 0 || modrm.rm != 5) {
            base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size
        }
        else {
            // mod=0 && rm == 5 => only disp32
        }
    }

    //update disp and pbuf
    if (modrm.mod == 2) {
        // have disp32
        disp = *(int*)*pbuf;
        *pbuf += 4;
    }
    else if (modrm.mod == 1) {
        // have disp8
        disp = *(char*)*pbuf;
        *pbuf += 1;
    }
    else {
        assert(modrm.mod == 0);
        if (modrm.rm == 5) {
            // have disp32 w/o sib
            disp = *(int*)*pbuf;
            *pbuf += 4;
        }
        else if (modrm.rm == 4 && sib.base == 5) {
            // have disp32 with SI in sib
            disp = *(int*)*pbuf;
            *pbuf += 4;
        }
    }
    opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp);
    return true;
}