Golang程序  |  642行  |  10.21 KB

// Copyright 2014 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package x86asm implements decoding of x86 machine code.
package x86asm

import (
	"bytes"
	"fmt"
)

// An Inst is a single instruction.
type Inst struct {
	Prefix   Prefixes // Prefixes applied to the instruction.
	Op       Op       // Opcode mnemonic
	Opcode   uint32   // Encoded opcode bits, left aligned (first byte is Opcode>>24, etc)
	Args     Args     // Instruction arguments, in Intel order
	Mode     int      // processor mode in bits: 16, 32, or 64
	AddrSize int      // address size in bits: 16, 32, or 64
	DataSize int      // operand size in bits: 16, 32, or 64
	MemBytes int      // size of memory argument in bytes: 1, 2, 4, 8, 16, and so on.
	Len      int      // length of encoded instruction in bytes
}

// Prefixes is an array of prefixes associated with a single instruction.
// The prefixes are listed in the same order as found in the instruction:
// each prefix byte corresponds to one slot in the array. The first zero
// in the array marks the end of the prefixes.
type Prefixes [14]Prefix

// A Prefix represents an Intel instruction prefix.
// The low 8 bits are the actual prefix byte encoding,
// and the top 8 bits contain distinguishing bits and metadata.
type Prefix uint16

const (
	// Metadata about the role of a prefix in an instruction.
	PrefixImplicit Prefix = 0x8000 // prefix is implied by instruction text
	PrefixIgnored  Prefix = 0x4000 // prefix is ignored: either irrelevant or overridden by a later prefix
	PrefixInvalid  Prefix = 0x2000 // prefix makes entire instruction invalid (bad LOCK)

	// Memory segment overrides.
	PrefixES Prefix = 0x26 // ES segment override
	PrefixCS Prefix = 0x2E // CS segment override
	PrefixSS Prefix = 0x36 // SS segment override
	PrefixDS Prefix = 0x3E // DS segment override
	PrefixFS Prefix = 0x64 // FS segment override
	PrefixGS Prefix = 0x65 // GS segment override

	// Branch prediction.
	PrefixPN Prefix = 0x12E // predict not taken (conditional branch only)
	PrefixPT Prefix = 0x13E // predict taken (conditional branch only)

	// Size attributes.
	PrefixDataSize Prefix = 0x66 // operand size override
	PrefixData16   Prefix = 0x166
	PrefixData32   Prefix = 0x266
	PrefixAddrSize Prefix = 0x67 // address size override
	PrefixAddr16   Prefix = 0x167
	PrefixAddr32   Prefix = 0x267

	// One of a kind.
	PrefixLOCK     Prefix = 0xF0 // lock
	PrefixREPN     Prefix = 0xF2 // repeat not zero
	PrefixXACQUIRE Prefix = 0x1F2
	PrefixBND      Prefix = 0x2F2
	PrefixREP      Prefix = 0xF3 // repeat
	PrefixXRELEASE Prefix = 0x1F3

	// The REX prefixes must be in the range [PrefixREX, PrefixREX+0x10).
	// the other bits are set or not according to the intended use.
	PrefixREX  Prefix = 0x40 // REX 64-bit extension prefix
	PrefixREXW Prefix = 0x08 // extension bit W (64-bit instruction width)
	PrefixREXR Prefix = 0x04 // extension bit R (r field in modrm)
	PrefixREXX Prefix = 0x02 // extension bit X (index field in sib)
	PrefixREXB Prefix = 0x01 // extension bit B (r/m field in modrm or base field in sib)
)

// IsREX reports whether p is a REX prefix byte.
func (p Prefix) IsREX() bool {
	return p&0xF0 == PrefixREX
}

func (p Prefix) String() string {
	p &^= PrefixImplicit | PrefixIgnored | PrefixInvalid
	if s := prefixNames[p]; s != "" {
		return s
	}

	if p.IsREX() {
		s := "REX."
		if p&PrefixREXW != 0 {
			s += "W"
		}
		if p&PrefixREXR != 0 {
			s += "R"
		}
		if p&PrefixREXX != 0 {
			s += "X"
		}
		if p&PrefixREXB != 0 {
			s += "B"
		}
		return s
	}

	return fmt.Sprintf("Prefix(%#x)", int(p))
}

// An Op is an x86 opcode.
type Op uint32

func (op Op) String() string {
	i := int(op)
	if i < 0 || i >= len(opNames) || opNames[i] == "" {
		return fmt.Sprintf("Op(%d)", i)
	}
	return opNames[i]
}

// An Args holds the instruction arguments.
// If an instruction has fewer than 4 arguments,
// the final elements in the array are nil.
type Args [4]Arg

// An Arg is a single instruction argument,
// one of these types: Reg, Mem, Imm, Rel.
type Arg interface {
	String() string
	isArg()
}

// Note that the implements of Arg that follow are all sized
// so that on a 64-bit machine the data can be inlined in
// the interface value instead of requiring an allocation.

// A Reg is a single register.
// The zero Reg value has no name but indicates ``no register.''
type Reg uint8

const (
	_ Reg = iota

	// 8-bit
	AL
	CL
	DL
	BL
	AH
	CH
	DH
	BH
	SPB
	BPB
	SIB
	DIB
	R8B
	R9B
	R10B
	R11B
	R12B
	R13B
	R14B
	R15B

	// 16-bit
	AX
	CX
	DX
	BX
	SP
	BP
	SI
	DI
	R8W
	R9W
	R10W
	R11W
	R12W
	R13W
	R14W
	R15W

	// 32-bit
	EAX
	ECX
	EDX
	EBX
	ESP
	EBP
	ESI
	EDI
	R8L
	R9L
	R10L
	R11L
	R12L
	R13L
	R14L
	R15L

	// 64-bit
	RAX
	RCX
	RDX
	RBX
	RSP
	RBP
	RSI
	RDI
	R8
	R9
	R10
	R11
	R12
	R13
	R14
	R15

	// Instruction pointer.
	IP  // 16-bit
	EIP // 32-bit
	RIP // 64-bit

	// 387 floating point registers.
	F0
	F1
	F2
	F3
	F4
	F5
	F6
	F7

	// MMX registers.
	M0
	M1
	M2
	M3
	M4
	M5
	M6
	M7

	// XMM registers.
	X0
	X1
	X2
	X3
	X4
	X5
	X6
	X7
	X8
	X9
	X10
	X11
	X12
	X13
	X14
	X15

	// Segment registers.
	ES
	CS
	SS
	DS
	FS
	GS

	// System registers.
	GDTR
	IDTR
	LDTR
	MSW
	TASK

	// Control registers.
	CR0
	CR1
	CR2
	CR3
	CR4
	CR5
	CR6
	CR7
	CR8
	CR9
	CR10
	CR11
	CR12
	CR13
	CR14
	CR15

	// Debug registers.
	DR0
	DR1
	DR2
	DR3
	DR4
	DR5
	DR6
	DR7
	DR8
	DR9
	DR10
	DR11
	DR12
	DR13
	DR14
	DR15

	// Task registers.
	TR0
	TR1
	TR2
	TR3
	TR4
	TR5
	TR6
	TR7
)

const regMax = TR7

func (Reg) isArg() {}

func (r Reg) String() string {
	i := int(r)
	if i < 0 || i >= len(regNames) || regNames[i] == "" {
		return fmt.Sprintf("Reg(%d)", i)
	}
	return regNames[i]
}

// A Mem is a memory reference.
// The general form is Segment:[Base+Scale*Index+Disp].
type Mem struct {
	Segment Reg
	Base    Reg
	Scale   uint8
	Index   Reg
	Disp    int64
}

func (Mem) isArg() {}

func (m Mem) String() string {
	var base, plus, scale, index, disp string

	if m.Base != 0 {
		base = m.Base.String()
	}
	if m.Scale != 0 {
		if m.Base != 0 {
			plus = "+"
		}
		if m.Scale > 1 {
			scale = fmt.Sprintf("%d*", m.Scale)
		}
		index = m.Index.String()
	}
	if m.Disp != 0 || m.Base == 0 && m.Scale == 0 {
		disp = fmt.Sprintf("%+#x", m.Disp)
	}
	return "[" + base + plus + scale + index + disp + "]"
}

// A Rel is an offset relative to the current instruction pointer.
type Rel int32

func (Rel) isArg() {}

func (r Rel) String() string {
	return fmt.Sprintf(".%+d", r)
}

// An Imm is an integer constant.
type Imm int64

func (Imm) isArg() {}

func (i Imm) String() string {
	return fmt.Sprintf("%#x", int64(i))
}

func (i Inst) String() string {
	var buf bytes.Buffer
	for _, p := range i.Prefix {
		if p == 0 {
			break
		}
		if p&PrefixImplicit != 0 {
			continue
		}
		fmt.Fprintf(&buf, "%v ", p)
	}
	fmt.Fprintf(&buf, "%v", i.Op)
	sep := " "
	for _, v := range i.Args {
		if v == nil {
			break
		}
		fmt.Fprintf(&buf, "%s%v", sep, v)
		sep = ", "
	}
	return buf.String()
}

func isReg(a Arg) bool {
	_, ok := a.(Reg)
	return ok
}

func isSegReg(a Arg) bool {
	r, ok := a.(Reg)
	return ok && ES <= r && r <= GS
}

func isMem(a Arg) bool {
	_, ok := a.(Mem)
	return ok
}

func isImm(a Arg) bool {
	_, ok := a.(Imm)
	return ok
}

func regBytes(a Arg) int {
	r, ok := a.(Reg)
	if !ok {
		return 0
	}
	if AL <= r && r <= R15B {
		return 1
	}
	if AX <= r && r <= R15W {
		return 2
	}
	if EAX <= r && r <= R15L {
		return 4
	}
	if RAX <= r && r <= R15 {
		return 8
	}
	return 0
}

func isSegment(p Prefix) bool {
	switch p {
	case PrefixCS, PrefixDS, PrefixES, PrefixFS, PrefixGS, PrefixSS:
		return true
	}
	return false
}

// The Op definitions and string list are in tables.go.

var prefixNames = map[Prefix]string{
	PrefixCS:       "CS",
	PrefixDS:       "DS",
	PrefixES:       "ES",
	PrefixFS:       "FS",
	PrefixGS:       "GS",
	PrefixSS:       "SS",
	PrefixLOCK:     "LOCK",
	PrefixREP:      "REP",
	PrefixREPN:     "REPN",
	PrefixAddrSize: "ADDRSIZE",
	PrefixDataSize: "DATASIZE",
	PrefixAddr16:   "ADDR16",
	PrefixData16:   "DATA16",
	PrefixAddr32:   "ADDR32",
	PrefixData32:   "DATA32",
	PrefixBND:      "BND",
	PrefixXACQUIRE: "XACQUIRE",
	PrefixXRELEASE: "XRELEASE",
	PrefixREX:      "REX",
	PrefixPT:       "PT",
	PrefixPN:       "PN",
}

var regNames = [...]string{
	AL:   "AL",
	CL:   "CL",
	BL:   "BL",
	DL:   "DL",
	AH:   "AH",
	CH:   "CH",
	BH:   "BH",
	DH:   "DH",
	SPB:  "SPB",
	BPB:  "BPB",
	SIB:  "SIB",
	DIB:  "DIB",
	R8B:  "R8B",
	R9B:  "R9B",
	R10B: "R10B",
	R11B: "R11B",
	R12B: "R12B",
	R13B: "R13B",
	R14B: "R14B",
	R15B: "R15B",
	AX:   "AX",
	CX:   "CX",
	BX:   "BX",
	DX:   "DX",
	SP:   "SP",
	BP:   "BP",
	SI:   "SI",
	DI:   "DI",
	R8W:  "R8W",
	R9W:  "R9W",
	R10W: "R10W",
	R11W: "R11W",
	R12W: "R12W",
	R13W: "R13W",
	R14W: "R14W",
	R15W: "R15W",
	EAX:  "EAX",
	ECX:  "ECX",
	EDX:  "EDX",
	EBX:  "EBX",
	ESP:  "ESP",
	EBP:  "EBP",
	ESI:  "ESI",
	EDI:  "EDI",
	R8L:  "R8L",
	R9L:  "R9L",
	R10L: "R10L",
	R11L: "R11L",
	R12L: "R12L",
	R13L: "R13L",
	R14L: "R14L",
	R15L: "R15L",
	RAX:  "RAX",
	RCX:  "RCX",
	RDX:  "RDX",
	RBX:  "RBX",
	RSP:  "RSP",
	RBP:  "RBP",
	RSI:  "RSI",
	RDI:  "RDI",
	R8:   "R8",
	R9:   "R9",
	R10:  "R10",
	R11:  "R11",
	R12:  "R12",
	R13:  "R13",
	R14:  "R14",
	R15:  "R15",
	IP:   "IP",
	EIP:  "EIP",
	RIP:  "RIP",
	F0:   "F0",
	F1:   "F1",
	F2:   "F2",
	F3:   "F3",
	F4:   "F4",
	F5:   "F5",
	F6:   "F6",
	F7:   "F7",
	M0:   "M0",
	M1:   "M1",
	M2:   "M2",
	M3:   "M3",
	M4:   "M4",
	M5:   "M5",
	M6:   "M6",
	M7:   "M7",
	X0:   "X0",
	X1:   "X1",
	X2:   "X2",
	X3:   "X3",
	X4:   "X4",
	X5:   "X5",
	X6:   "X6",
	X7:   "X7",
	X8:   "X8",
	X9:   "X9",
	X10:  "X10",
	X11:  "X11",
	X12:  "X12",
	X13:  "X13",
	X14:  "X14",
	X15:  "X15",
	CS:   "CS",
	SS:   "SS",
	DS:   "DS",
	ES:   "ES",
	FS:   "FS",
	GS:   "GS",
	GDTR: "GDTR",
	IDTR: "IDTR",
	LDTR: "LDTR",
	MSW:  "MSW",
	TASK: "TASK",
	CR0:  "CR0",
	CR1:  "CR1",
	CR2:  "CR2",
	CR3:  "CR3",
	CR4:  "CR4",
	CR5:  "CR5",
	CR6:  "CR6",
	CR7:  "CR7",
	CR8:  "CR8",
	CR9:  "CR9",
	CR10: "CR10",
	CR11: "CR11",
	CR12: "CR12",
	CR13: "CR13",
	CR14: "CR14",
	CR15: "CR15",
	DR0:  "DR0",
	DR1:  "DR1",
	DR2:  "DR2",
	DR3:  "DR3",
	DR4:  "DR4",
	DR5:  "DR5",
	DR6:  "DR6",
	DR7:  "DR7",
	DR8:  "DR8",
	DR9:  "DR9",
	DR10: "DR10",
	DR11: "DR11",
	DR12: "DR12",
	DR13: "DR13",
	DR14: "DR14",
	DR15: "DR15",
	TR0:  "TR0",
	TR1:  "TR1",
	TR2:  "TR2",
	TR3:  "TR3",
	TR4:  "TR4",
	TR5:  "TR5",
	TR6:  "TR6",
	TR7:  "TR7",
}