C++程序  |  337行  |  11.37 KB

//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is part of the ARM Disassembler.
//
// The first part defines the enumeration type of ARM instruction format, which
// specifies the encoding used by the instruction, as well as a helper function
// to convert the enums to printable char strings.
//
// It also contains code to represent the concepts of Builder and DisassembleFP
// to solve the problem of disassembling an ARM instr.
//
//===----------------------------------------------------------------------===//

#ifndef ARMDISASSEMBLERCORE_H
#define ARMDISASSEMBLERCORE_H

#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm-c/Disassembler.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"

namespace llvm {
class MCContext;

class ARMUtils {
public:
  static const char *OpcodeName(unsigned Opcode);
};

/////////////////////////////////////////////////////
//                                                 //
//  Enums and Utilities for ARM Instruction Format //
//                                                 //
/////////////////////////////////////////////////////

#define ARM_FORMATS                   \
  ENTRY(ARM_FORMAT_PSEUDO,         0) \
  ENTRY(ARM_FORMAT_MULFRM,         1) \
  ENTRY(ARM_FORMAT_BRFRM,          2) \
  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
  ENTRY(ARM_FORMAT_DPFRM,          4) \
  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
  ENTRY(ARM_FORMAT_LDFRM,          6) \
  ENTRY(ARM_FORMAT_STFRM,          7) \
  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
  ENTRY(ARM_FORMAT_SATFRM,        13) \
  ENTRY(ARM_FORMAT_EXTFRM,        14) \
  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
  ENTRY(ARM_FORMAT_MISCFRM,       26) \
  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
  ENTRY(ARM_FORMAT_NLdSt,         30) \
  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
  ENTRY(ARM_FORMAT_N2Reg,         32) \
  ENTRY(ARM_FORMAT_NVCVT,         33) \
  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
  ENTRY(ARM_FORMAT_N3Reg,         37) \
  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
  ENTRY(ARM_FORMAT_NVecExtract,   39) \
  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
  ENTRY(ARM_FORMAT_NVTBL,         41)

// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
typedef enum {
  ARM_FORMATS
  ARM_FORMAT_NA
} ARMFormat;
#undef ENTRY

// Converts enum to const char*.
static const inline char *stringForARMFormat(ARMFormat form) {
#define ENTRY(n, v) case n: return #n;
  switch(form) {
    ARM_FORMATS
  case ARM_FORMAT_NA:
  default:
    return "";
  }
#undef ENTRY
}

/// Expands on the enum definitions from ARMBaseInstrInfo.h.
/// They are being used by the disassembler implementation.
namespace ARMII {
  enum {
    NEONRegMask = 15,
    GPRRegMask = 15,
    NEON_RegRdShift = 12,
    NEON_D_BitShift = 22,
    NEON_RegRnShift = 16,
    NEON_N_BitShift = 7,
    NEON_RegRmShift = 0,
    NEON_M_BitShift = 5
  };
}

/// Utility function for extracting [From, To] bits from a uint32_t.
static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
  assert(From < 32 && To < 32 && From >= To);
  return (Bits >> To) & ((1 << (From - To + 1)) - 1);
}

/// Utility function for setting [From, To] bits to Val for a uint32_t.
static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
                            unsigned Val) {
  assert(From < 32 && To < 32 && From >= To);
  uint32_t Mask = ((1 << (From - To + 1)) - 1);
  Bits &= ~(Mask << To);
  Bits |= (Val & Mask) << To;
}

// Return an integer result equal to the number of bits of x that are ones.
static inline uint32_t
BitCount (uint64_t x)
{
    // c accumulates the total bits set in x
    uint32_t c;
    for (c = 0; x; ++c)
    {
        x &= x - 1; // clear the least significant bit set
    }
    return c;
}

static inline bool
BitIsSet (const uint64_t value, const uint64_t bit)
{
    return (value & (1ull << bit)) != 0;
}

static inline bool
BitIsClear (const uint64_t value, const uint64_t bit)
{
    return (value & (1ull << bit)) == 0;
}

/// Various utilities for checking the target specific flags.

/// A unary data processing instruction doesn't have an Rn operand.
static inline bool isUnaryDP(uint64_t TSFlags) {
  return (TSFlags & ARMII::UnaryDP);
}

/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111.
static inline bool isNEONDomain(uint64_t TSFlags) {
  return (TSFlags & ARMII::DomainNEON) ||
         (TSFlags & ARMII::DomainNEONA8);
}

/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
static inline unsigned getAddrMode(uint64_t TSFlags) {
  return (TSFlags & ARMII::AddrModeMask);
}

/// {IndexModePre, IndexModePost}
/// Only valid for load and store ops.
/// See also ARMBaseInstrInfo.h.
static inline unsigned getIndexMode(uint64_t TSFlags) {
  return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
}

/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
static inline bool isPrePostLdSt(uint64_t TSFlags) {
  return (TSFlags & ARMII::IndexModeMask) != 0;
}

// Forward declaration.
class ARMBasicMCBuilder;

// Builder Object is mostly ignored except in some Thumb disassemble functions.
typedef ARMBasicMCBuilder *BO;

/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
/// and builds the MCOperand list upon disassembly.  It returns false on failure
/// or true on success.  The number of operands added is updated upon success.
typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);

/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
/// infrastructure of an MCInst given the Opcode and Format of the instr.
/// Return NULL if it fails to create/return a proper builder.  API clients
/// are responsible for freeing up of the allocated memory.  Cacheing can be
/// performed by the API clients to improve performance.
extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);

/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
/// knows how to build up the MCOperand list.
class ARMBasicMCBuilder {
  friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
  unsigned Opcode;
  ARMFormat Format;
  unsigned short NumOps;
  DisassembleFP Disasm;
  Session *SP;
  int Err; // !=0 if the builder encounters some error condition during build.

private:
  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);

public:
  ARMBasicMCBuilder(ARMBasicMCBuilder &B)
    : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
      SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
    Err = 0;
  }

  virtual ~ARMBasicMCBuilder() {}

  void SetSession(Session *sp) {
    SP = sp;
  }

  void SetErr(int ErrCode) {
    Err = ErrCode;
  }

  /// DoPredicateOperands - DoPredicateOperands process the predicate operands
  /// of some Thumb instructions which come before the reglist operands.  It
  /// returns true if the two predicate operands have been processed.
  bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
      uint32_t insn, unsigned short NumOpsRemaning);
  
  /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
  /// the possible Predicate and SBitModifier, to build the remaining MCOperand
  /// constituents.
  bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
      uint32_t insn, unsigned short NumOpsRemaning);

  /// InITBlock - InITBlock returns true if we are inside an IT block.
  bool InITBlock() {
    if (SP)
      return SP->ITCounter > 0;

    return false;
  }

  /// Build - Build delegates to BuildIt to perform the heavy liftling.  After
  /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
  virtual bool Build(MCInst &MI, uint32_t insn) {
    bool Status = BuildIt(MI, insn);
    return RunBuildAfterHook(Status, MI, insn);
  }

  /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
  /// The general idea is to set the Opcode for the MCInst, followed by adding
  /// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
  /// to the Format-specific disassemble function for disassembly, followed by
  /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
  /// which follow the Dst/Src Operands.
  virtual bool BuildIt(MCInst &MI, uint32_t insn);

  /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
  /// after BuildIt is finished.
  virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);

private:
  /// Get condition of the current IT instruction.
  unsigned GetITCond() {
    assert(SP);
    return slice(SP->ITState, 7, 4);
  }

private:
  //
  // Hooks for symbolic disassembly via the public 'C' interface.
  //
  // The function to get the symbolic information for operands.
  LLVMOpInfoCallback GetOpInfo;
  // The pointer to the block of symbolic information for above call back.
  void *DisInfo;
  // The assembly context for creating symbols and MCExprs in place of
  // immediate operands when there is symbolic information.
  MCContext *Ctx;
  // The address of the instruction being disassembled.
  uint64_t Address;

public:
  void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
                                          void *disInfo, MCContext *ctx,
                                          uint64_t address) {
    GetOpInfo = getOpInfo;
    DisInfo = disInfo;
    Ctx = ctx;
    Address = address;
  }

  uint64_t getBuilderAddress() const { return Address; }

  /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
  /// operand in place of the immediate Value in the MCInst.  The immediate
  /// Value has had any PC adjustment made by the caller.  If the getOpInfo()
  /// function was set as part of the setupBuilderForSymbolicDisassembly() call
  /// then that function is called to get any symbolic information at the
  /// builder's Address for this instrution.  If that returns non-zero then the
  /// symbolic information it returns is used to create an MCExpr and that is
  /// added as an operand to the MCInst.  This function returns true if it adds
  /// an operand to the MCInst and false otherwise.
  bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);

};

} // namespace llvm

#endif