/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author Alexander V. Astapchuk
*/
/**
* @file
* @brief Main encoding routines and structures.
*/
#ifndef __ENC_BASE_H_INCLUDED__
#define __ENC_BASE_H_INCLUDED__
#include "enc_defs.h"
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
ENCODER_NAMESPACE_START
struct MnemonicInfo;
struct OpcodeInfo;
struct Rex;
/**
* @brief Basic facilities for generation of processor's instructions.
*
* The class EncoderBase represents the basic facilities for the encoding of
* processor's instructions on IA32 and EM64T platforms.
*
* The class provides general interface to generate the instructions as well
* as to retrieve some static data about instructions (number of arguments,
* their roles, etc).
*
* Currently, the EncoderBase class is used for both LIL and Jitrino code
* generators. Each of these code generators has its own wrapper to adapt
* this general interface for specific needs - see encoder.h for LIL wrappers
* and Ia32Encoder.h for Jitrino's adapter.
*
* Interface is provided through static methods, no instances of EncoderBase
* to be created.
*
* @todo RIP-based addressing on EM64T - it's not yet supported currently.
*/
class EncoderBase {
public:
class Operands;
struct MnemonicDesc;
/**
* @brief Generates processor's instruction.
*
* @param stream - a buffer to generate into
* @param mn - \link Mnemonic mnemonic \endlink of the instruction
* @param opnds - operands for the instruction
* @returns (stream + length of the just generated instruction)
*/
static char * encode(char * stream, Mnemonic mn, const Operands& opnds);
static char * getOpndLocation(int index);
/**
* @brief Generates the smallest possible number of NOP-s.
*
* Effectively generates the smallest possible number of instructions,
* which are NOP-s for CPU. Normally used to make a code alignment.
*
* The method inserts exactly number of bytes specified. It's a caller's
* responsibility to make sure the buffer is big enough.
*
* @param stream - buffer where to generate code into, can not be NULL
* @param howMany - how many bytes to fill with NOP-s
* @return \c (stream+howMany)
*/
static char * nops(char * stream, unsigned howMany);
/**
* @brief Inserts a prefix into the code buffer.
*
* The method writes no more than one byte into the buffer. This is a
* caller's responsibility to make sure the buffer is big enough.
*
* @param stream - buffer where to insert the prefix
* @param pref - prefix to be inserted. If it's InstPrefix_Null, then
* no action performed and return value is \c stream.
* @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream
* otherwise
*/
static char * prefix(char* stream, InstPrefix pref);
/**
* @brief Determines if operand with opndExt suites the position with instExt.
*/
static bool extAllowed(OpndExt opndExt, OpndExt instExt);
/**
* @brief Returns #MnemonicDesc by the given Mnemonic.
*/
static const MnemonicDesc * getMnemonicDesc(Mnemonic mn)
{
assert(mn < Mnemonic_Count);
return mnemonics + mn;
}
/**
* @brief Returns a Mnemonic for the given name.
*
* The lookup is case insensitive, if no mnemonic found for the given
* string, then Mnemonic_Null returned.
*/
static Mnemonic str2mnemonic(const char * mn_name);
/**
* @brief Returns a string representation of the given Mnemonic.
*
* If invalid mnemonic passed, then the behavior is unpredictable.
*/
static const char * getMnemonicString(Mnemonic mn)
{
return getMnemonicDesc(mn)->name;
}
static const char * toStr(Mnemonic mn)
{
return getMnemonicDesc(mn)->name;
}
/**
* @brief Description of operand.
*
* Description of an operand in opcode - its kind, size or RegName if
* operand must be a particular register.
*/
struct OpndDesc {
/**
* @brief Location of the operand.
*
* May be a mask, i.e. OpndKind_Imm|OpndKind_Mem.
*/
OpndKind kind;
/**
* @brief Size of the operand.
*/
OpndSize size;
/**
* @brief Extention of the operand.
*/
OpndExt ext;
/**
* @brief Appropriate RegName if operand must reside on a particular
* register (i.e. CWD/CDQ instructions), RegName_Null
* otherwise.
*/
RegName reg;
};
/**
* @brief Description of operands' roles in instruction.
*/
struct OpndRolesDesc {
/**
* @brief Total number of operands in the operation.
*/
unsigned count;
/**
* @brief Number of defs in the operation.
*/
unsigned defCount;
/**
* @brief Number of uses in the operation.
*/
unsigned useCount;
/**
* @brief Operand roles, bit-packed.
*
* A bit-packed info about operands' roles. Each operand's role is
* described by two bits, counted from right-to-left - the less
* significant bits (0,1) represent operand#0.
*
* The mask is build by ORing #OpndRole_Def and #OpndRole_Use
* appropriately and shifting left, i.e. operand#0's role would be
* - '(OpndRole_Def|OpndRole_Use)'
* - opnd#1's role would be 'OpndRole_Use<<2'
* - and operand#2's role would be, say, 'OpndRole_Def<<4'.
*/
unsigned roles;
};
/**
* @brief Extracts appropriate OpndRole for a given operand.
*
* The order of operands is left-to-right, i.e. for MOV, it
* would be 'MOV op0, op1'
*/
static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx)
{
assert(idx < ord.count);
return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3);
}
/**
* @brief Info about single opcode - its opcode bytes, operands,
* operands' roles.
*/
union OpcodeDesc {
char dummy[128]; // To make total size a power of 2
struct {
/**
* @brief Raw opcode bytes.
*
* 'Raw' opcode bytes which do not require any analysis and are
* independent from arguments/sizes/etc (may include opcode size
* prefix).
*/
char opcode[5];
unsigned opcode_len;
unsigned aux0;
unsigned aux1;
/**
* @brief Info about opcode's operands.
*
* The [3] mostly comes from IDIV/IMUL which both may have up to 3
* operands.
*/
OpndDesc opnds[3];
unsigned first_opnd;
/**
* @brief Info about operands - total number, number of uses/defs,
* operands' roles.
*/
OpndRolesDesc roles;
/**
* @brief If not zero, then this is final OpcodeDesc structure in
* the list of opcodes for a given mnemonic.
*/
char last;
char platf;
};
};
public:
/**
* @brief General info about mnemonic.
*/
struct MnemonicDesc {
/**
* @brief The mnemonic itself.
*/
Mnemonic mn;
/**
* Various characteristics of mnemonic.
* @see MF_
*/
unsigned flags;
/**
* @brief Operation's operand's count and roles.
*
* For the operations whose opcodes may use different number of
* operands (i.e. IMUL/SHL) either most common value used, or empty
* value left.
*/
OpndRolesDesc roles;
/**
* @brief Print name of the mnemonic.
*/
const char * name;
};
/**
* @brief Magic number, shows a maximum value a hash code can take.
*
* For meaning and arithmetics see enc_tabl.cpp.
*
* The value was increased from '5155' to '8192' to make it aligned
* for faster access in EncoderBase::lookup().
*/
static const unsigned int HASH_MAX = 8192; //5155;
/**
* @brief Empty value, used in hash-to-opcode map to show an empty slot.
*/
static const unsigned char NOHASH = 0xFF;
/**
* @brief The name says it all.
*/
static const unsigned char HASH_BITS_PER_OPERAND = 5;
/**
* @brief Contains info about a single instructions's operand - its
* location, size and a value for immediate or RegName for
* register operands.
*/
class Operand {
public:
/**
* @brief Initializes the instance with empty size and kind.
*/
Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {}
/**
* @brief Creates register operand from given RegName.
*/
Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)),
m_size(getRegSize(reg)),
m_ext(ext), m_reg(reg)
{
hash_it();
}
/**
* @brief Creates register operand from given RegName and with the
* specified size and kind.
*
* Used to speedup Operand creation as there is no need to extract
* size and kind from the RegName.
* The provided size and kind must match the RegName's ones though.
*/
Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) :
m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg)
{
assert(m_size == getRegSize(reg));
assert(m_kind == getRegKind(reg));
hash_it();
}
/**
* @brief Creates immediate operand with the given size and value.
*/
Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_32.
*/
Operand(int ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_16.
*/
Operand(short ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates immediate operand of OpndSize_8.
*/
Operand(char ival, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival)
{
hash_it();
}
/**
* @brief Creates memory operand.
*/
Operand(OpndSize size, RegName base, RegName index, unsigned scale,
int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
{
m_base = base;
m_index = index;
m_scale = scale;
m_disp = disp;
hash_it();
}
/**
* @brief Creates memory operand with only base and displacement.
*/
Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) :
m_kind(OpndKind_Mem), m_size(size), m_ext(ext)
{
m_base = base;
m_index = RegName_Null;
m_scale = 0;
m_disp = disp;
hash_it();
}
//
// general info
//
/**
* @brief Returns kind of the operand.
*/
OpndKind kind(void) const { return m_kind; }
/**
* @brief Returns size of the operand.
*/
OpndSize size(void) const { return m_size; }
/**
* @brief Returns extention of the operand.
*/
OpndExt ext(void) const { return m_ext; }
/**
* @brief Returns hash of the operand.
*/
unsigned hash(void) const { return m_hash; }
//
#ifdef _EM64T_
bool need_rex(void) const { return m_need_rex; }
#else
bool need_rex(void) const { return false; }
#endif
/**
* @brief Tests whether operand is memory operand.
*/
bool is_mem(void) const { return is_placed_in(OpndKind_Mem); }
/**
* @brief Tests whether operand is immediate operand.
*/
bool is_imm(void) const { return is_placed_in(OpndKind_Imm); }
/**
* @brief Tests whether operand is register operand.
*/
bool is_reg(void) const { return is_placed_in(OpndKind_Reg); }
/**
* @brief Tests whether operand is general-purpose register operand.
*/
bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); }
/**
* @brief Tests whether operand is float-point pseudo-register operand.
*/
bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); }
/**
* @brief Tests whether operand is XMM register operand.
*/
bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); }
#ifdef _HAVE_MMX_
/**
* @brief Tests whether operand is MMX register operand.
*/
bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); }
#endif
/**
* @brief Tests whether operand is signed immediate operand.
*/
//bool is_signed(void) const { assert(is_imm()); return m_is_signed; }
/**
* @brief Returns base of memory operand (RegName_Null if not memory).
*/
RegName base(void) const { return is_mem() ? m_base : RegName_Null; }
/**
* @brief Returns index of memory operand (RegName_Null if not memory).
*/
RegName index(void) const { return is_mem() ? m_index : RegName_Null; }
/**
* @brief Returns scale of memory operand (0 if not memory).
*/
unsigned scale(void) const { return is_mem() ? m_scale : 0; }
/**
* @brief Returns displacement of memory operand (0 if not memory).
*/
int disp(void) const { return is_mem() ? m_disp : 0; }
/**
* @brief Returns RegName of register operand (RegName_Null if not
* register).
*/
RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; }
/**
* @brief Returns value of immediate operand (0 if not immediate).
*/
long long imm(void) const { return is_imm() ? m_imm64 : 0; }
private:
bool is_placed_in(OpndKind kd) const
{
return kd == OpndKind_Reg ?
m_kind == OpndKind_GPReg ||
#ifdef _HAVE_MMX_
m_kind == OpndKind_MMXReg ||
#endif
m_kind == OpndKind_FPReg ||
m_kind == OpndKind_XMMReg
: kd == m_kind;
}
void hash_it(void)
{
m_hash = get_size_hash(m_size) | get_kind_hash(m_kind);
#ifdef _EM64T_
m_need_rex = false;
if (is_reg() && is_em64t_extra_reg(m_reg)) {
m_need_rex = true;
}
else if (is_mem() && (is_em64t_extra_reg(m_base) ||
is_em64t_extra_reg(m_index))) {
m_need_rex = true;
}
#endif
}
// general info
OpndKind m_kind;
OpndSize m_size;
OpndExt m_ext;
// complex address form support
RegName m_base;
RegName m_index;
unsigned m_scale;
union {
int m_disp;
RegName m_reg;
long long m_imm64;
};
unsigned m_hash;
bool m_need_rex;
friend class EncoderBase::Operands;
};
/**
* @brief Simple container for up to 3 Operand-s.
*/
class Operands {
public:
Operands(void)
{
clear();
}
Operands(const Operand& op0)
{
clear();
add(op0);
}
Operands(const Operand& op0, const Operand& op1)
{
clear();
add(op0); add(op1);
}
Operands(const Operand& op0, const Operand& op1, const Operand& op2)
{
clear();
add(op0); add(op1); add(op2);
}
unsigned count(void) const { return m_count; }
unsigned hash(void) const { return m_hash; }
const Operand& operator[](unsigned idx) const
{
assert(idx<m_count);
return m_operands[idx];
}
void add(const Operand& op)
{
assert(m_count < COUNTOF(m_operands));
m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash();
m_operands[m_count++] = op;
m_need_rex = m_need_rex || op.m_need_rex;
}
#ifdef _EM64T_
bool need_rex(void) const { return m_need_rex; }
#else
bool need_rex(void) const { return false; }
#endif
void clear(void)
{
m_count = 0; m_hash = 0; m_need_rex = false;
}
private:
unsigned m_count;
Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )];
unsigned m_hash;
bool m_need_rex;
};
public:
#ifdef _DEBUG
/**
* Verifies some presumptions about encoding data table.
* Called automaticaly during statics initialization.
*/
static int verify(void);
#endif
private:
/**
* @brief Returns found OpcodeDesc by the given Mnemonic and operands.
*/
static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds);
/**
* @brief Encodes mod/rm byte.
*/
static char* encodeModRM(char* stream, const Operands& opnds,
unsigned idx, const OpcodeDesc * odesc, Rex * prex);
/**
* @brief Encodes special things of opcode description - '/r', 'ib', etc.
*/
static char* encode_aux(char* stream, unsigned aux,
const Operands& opnds, const OpcodeDesc * odesc,
unsigned * pargsCount, Rex* prex);
#ifdef _EM64T_
/**
* @brief Returns true if the 'reg' argument represents one of the new
* EM64T registers - R8(D)-R15(D).
*
* The 64 bits versions of 'old-fashion' registers, i.e. RAX are not
* considered as 'extra'.
*/
static bool is_em64t_extra_reg(const RegName reg)
{
if (needs_rex_r(reg)) {
return true;
}
if (RegName_SPL <= reg && reg <= RegName_R15L) {
return true;
}
return false;
}
static bool needs_rex_r(const RegName reg)
{
if (RegName_R8 <= reg && reg <= RegName_R15) {
return true;
}
if (RegName_R8D <= reg && reg <= RegName_R15D) {
return true;
}
if (RegName_R8S <= reg && reg <= RegName_R15S) {
return true;
}
if (RegName_R8L <= reg && reg <= RegName_R15L) {
return true;
}
if (RegName_XMM8 <= reg && reg <= RegName_XMM15) {
return true;
}
if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) {
return true;
}
if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) {
return true;
}
return false;
}
/**
* @brief Returns an 'processor's index' of the register - the index
* used to encode the register in ModRM/SIB bytes.
*
* For the new EM64T registers the 'HW index' differs from the index
* encoded in RegName. For old-fashion registers it's effectively the
* same as ::getRegIndex(RegName).
*/
static unsigned char getHWRegIndex(const RegName reg)
{
if (getRegKind(reg) != OpndKind_GPReg) {
return getRegIndex(reg);
}
if (RegName_SPL <= reg && reg<=RegName_DIL) {
return getRegIndex(reg);
}
if (RegName_R8L<= reg && reg<=RegName_R15L) {
return getRegIndex(reg) - getRegIndex(RegName_R8L);
}
return is_em64t_extra_reg(reg) ?
getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg);
}
#else
static unsigned char getHWRegIndex(const RegName reg)
{
return getRegIndex(reg);
}
static bool is_em64t_extra_reg(const RegName reg)
{
return false;
}
#endif
public:
static unsigned char get_size_hash(OpndSize size) {
return (size <= OpndSize_64) ? size_hash[size] : 0xFF;
}
static unsigned char get_kind_hash(OpndKind kind) {
return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF;
}
/**
* @brief A table used for the fast computation of hash value.
*
* A change must be strictly balanced with hash-related functions and data
* in enc_base.h/.cpp.
*/
static const unsigned char size_hash[OpndSize_64+1];
/**
* @brief A table used for the fast computation of hash value.
*
* A change must be strictly balanced with hash-related functions and data
* in enc_base.h/.cpp.
*/
static const unsigned char kind_hash[OpndKind_Mem+1];
/**
* @brief Maximum number of opcodes used for a single mnemonic.
*
* No arithmetics behind the number, simply estimated.
*/
static const unsigned int MAX_OPCODES = 32; //20;
/**
* @brief Mapping between operands hash code and operands.
*/
static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX];
/**
* @brief Array of mnemonics.
*/
static MnemonicDesc mnemonics[Mnemonic_Count];
/**
* @brief Array of available opcodes.
*/
static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES];
static int buildTable(void);
static void buildMnemonicDesc(const MnemonicInfo * minfo);
/**
* @brief Computes hash value for the given operands.
*/
static unsigned short getHash(const OpcodeInfo* odesc);
/**
* @brief Dummy variable, for automatic invocation of buildTable() at
* startup.
*/
static int dummy;
static char * curRelOpnd[3];
};
ENCODER_NAMESPACE_END
#endif // ifndef __ENC_BASE_H_INCLUDED__