// Copyright 2017 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
// gen generates instruction tables (ifuzz/insns.go) from Intel XED tables.
// Tables used to generate insns.go are checked in in all-enc-instructions.txt.
package main
import (
"bufio"
"fmt"
"os"
"reflect"
"strconv"
"strings"
"github.com/google/syzkaller/pkg/ifuzz"
"github.com/google/syzkaller/pkg/serializer"
)
// nolint: gocyclo
func main() {
if len(os.Args) != 2 {
failf("usage: gen instructions.txt")
}
f, err := os.Open(os.Args[1])
if err != nil {
failf("failed to open input file: %v", err)
}
defer f.Close()
skipped := 0
saved := ""
var insns []*ifuzz.Insn
var insn, insn1 *ifuzz.Insn
s := bufio.NewScanner(f)
for i := 1; s.Scan(); i++ {
reportError := func(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text())
failf(msg, args...)
}
line := s.Text()
if comment := strings.IndexByte(line, '#'); comment != -1 {
line = line[:comment]
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
if line[len(line)-1] == '\\' {
saved += line[:len(line)-1]
continue
}
line = saved + line
saved = ""
if line == "{" {
insn = new(ifuzz.Insn)
continue
}
if line == "}" {
if insn1 != nil {
insns = append(insns, insn1)
insn1 = nil
insn = nil
}
continue
}
colon := strings.IndexByte(line, ':')
if colon == -1 {
reportError("no colon")
}
name := strings.TrimSpace(line[:colon])
if name == "" {
reportError("empty attribute name")
}
var vals []string
for _, v := range strings.Split(line[colon+1:], " ") {
v = strings.TrimSpace(v)
if v == "" {
continue
}
vals = append(vals, v)
}
switch name {
case "ICLASS":
if len(vals) != 1 {
reportError("ICLASS has more than one value")
}
insn.Name = vals[0]
case "CPL":
if len(vals) != 1 {
reportError("CPL has more than one value")
}
if vals[0] != "0" && vals[0] != "3" {
reportError("unknown CPL value: %v", vals[0])
}
insn.Priv = vals[0] == "0"
case "EXTENSION":
if len(vals) != 1 {
reportError("EXTENSION has more than one value")
}
insn.Extension = vals[0]
switch insn.Extension {
case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER":
insn.Mode = 1<<ifuzz.ModeLong64 | 1<<ifuzz.ModeProt32
}
insn.Avx2Gather = insn.Extension == "AVX2GATHER"
case "PATTERN":
if insn1 != nil {
insns = append(insns, insn1)
}
insn1 = new(ifuzz.Insn)
*insn1 = *insn
if err := parsePattern(insn1, vals); err != nil {
if _, ok := err.(errSkip); !ok {
reportError(err.Error())
}
if err.Error() != "" {
fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
}
skipped++
insn1 = nil
}
case "OPERANDS":
if insn1 == nil {
break
}
if err := parseOperands(insn1, vals); err != nil {
if _, ok := err.(errSkip); !ok {
reportError(err.Error())
}
if err.Error() != "" {
fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err)
}
skipped++
insn1 = nil
}
}
}
var deduped []*ifuzz.Insn
nextInsn:
for _, insn := range insns {
if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" {
skipped++
continue
}
mod0 := insn.Mod
for j := len(deduped) - 1; j >= 0; j-- {
insn1 := deduped[j]
if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 {
insn.Mod = insn1.Mod
}
if reflect.DeepEqual(insn, insn1) {
if insn.Mod != mod0 {
insn1.Mod = -1
}
continue nextInsn
}
insn.Mod = mod0
}
deduped = append(deduped, insn)
}
fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped))
insns = deduped
fmt.Printf("// AUTOGENERATED FILE\n\n")
fmt.Printf("package ifuzz\n\n")
fmt.Printf("import . \"github.com/google/syzkaller/pkg/ifuzz\"\n\n")
fmt.Printf("func init() { Insns = insns }\n\n")
fmt.Printf("var insns = ")
serializer.Write(os.Stdout, insns)
fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped)
}
type errSkip string
func (err errSkip) Error() string {
return string(err)
}
// nolint: gocyclo
func parsePattern(insn *ifuzz.Insn, vals []string) error {
if insn.Opcode != nil {
return fmt.Errorf("PATTERN is already parsed for the instruction")
}
// As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix.
if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" ||
insn.Name == "NOP8" || insn.Name == "NOP9" {
return errSkip("")
}
if insn.Mode == 0 {
insn.Mode = 1<<ifuzz.ModeLast - 1
}
insn.Mod = -100
insn.Reg = -100
insn.Rm = -100
insn.VexP = -1
for _, v := range vals {
switch {
case strings.HasPrefix(v, "0x"):
op, err := strconv.ParseUint(v, 0, 8)
if err != nil {
return fmt.Errorf("failed to parse hex pattern: %v", v)
}
if !insn.Modrm {
insn.Opcode = append(insn.Opcode, byte(op))
} else {
insn.Suffix = append(insn.Suffix, byte(op))
}
case strings.HasPrefix(v, "0b"):
if len(v) != 8 || v[6] != '_' {
return fmt.Errorf("failed to parse bin pattern: %v", v)
}
var op byte
if v[2] == '1' {
op |= 1 << 7
}
if v[3] == '1' {
op |= 1 << 6
}
if v[4] == '1' {
op |= 1 << 5
}
if v[5] == '1' {
op |= 1 << 4
}
if v[7] == '1' {
op |= 1 << 3
}
insn.Opcode = append(insn.Opcode, op)
case strings.HasPrefix(v, "MOD["):
insn.Modrm = true
vv, err := parseModrm(v[3:])
if err != nil {
return fmt.Errorf("failed to parse %v: %v", v, err)
}
insn.Mod = vv
case strings.HasPrefix(v, "REG["):
insn.Modrm = true
vv, err := parseModrm(v[3:])
if err != nil {
return fmt.Errorf("failed to parse %v: %v", v, err)
}
insn.Reg = vv
case strings.HasPrefix(v, "RM["):
insn.Modrm = true
vv, err := parseModrm(v[2:])
if err != nil {
return fmt.Errorf("failed to parse %v: %v", v, err)
}
insn.Rm = vv
case v == "RM=4":
insn.Rm = 4
case strings.HasPrefix(v, "SRM["):
vv, err := parseModrm(v[3:])
if err != nil {
return fmt.Errorf("failed to parse %v: %v", v, err)
}
insn.Rm = vv
insn.Srm = true
case v == "SRM=0", v == "SRM!=0":
case v == "MOD!=3":
if !insn.Modrm || insn.Mod != -1 {
return fmt.Errorf("MOD!=3 without MOD")
}
insn.Mod = -3
case v == "MOD=3":
// Most other instructions contain "MOD[0b11] MOD=3",
// but BNDCL contains "MOD[mm] MOD=3"
insn.Mod = 3
case v == "MOD=0":
insn.Mod = 0
case v == "MOD=1":
insn.Mod = 1
case v == "MOD=2":
insn.Mod = 2
case v == "lock_prefix":
insn.Prefix = append(insn.Prefix, 0xF0)
// Immediates.
case v == "UIMM8()", v == "SIMM8()":
addImm(insn, 1)
case v == "UIMM16()":
addImm(insn, 2)
case v == "UIMM32()":
addImm(insn, 4)
case v == "SIMMz()":
addImm(insn, -1)
case v == "UIMMv()":
addImm(insn, -3)
case v == "UIMM8_1()":
addImm(insn, 1)
case v == "BRDISP8()":
addImm(insn, 1)
case v == "BRDISP32()":
addImm(insn, 4)
case v == "BRDISPz()":
addImm(insn, -1)
case v == "MEMDISPv()":
addImm(insn, -2)
// VOP/VEX
case v == "XOPV":
insn.Vex = 0x8f
insn.Mode &^= 1 << ifuzz.ModeReal16
case v == "EVV":
insn.Vex = 0xc4
case v == "VV1":
insn.Vex = 0xc4
case v == "VMAP0":
insn.VexMap = 0
case v == "V0F":
insn.VexMap = 1
case v == "V0F38":
insn.VexMap = 2
case v == "V0F3A":
insn.VexMap = 3
case v == "XMAP8":
insn.VexMap = 8
case v == "XMAP9":
insn.VexMap = 9
case v == "XMAPA":
insn.VexMap = 10
case v == "VNP":
insn.VexP = 0
case v == "V66":
insn.VexP = 1
case v == "VF2":
insn.VexP = 3
case v == "VF3":
insn.VexP = 2
case v == "VL128", v == "VL=0":
insn.VexL = -1
case v == "VL256", v == "VL=1":
insn.VexL = 1
case v == "NOVSR":
insn.VexNoR = true
case v == "NOEVSR":
insn.VexNoR = true
// VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0
case v == "SE_IMM8()":
addImm(insn, 1)
// Modes
case v == "mode64":
insn.Mode &= 1 << ifuzz.ModeLong64
case v == "not64":
insn.Mode &^= 1 << ifuzz.ModeLong64
case v == "mode32":
insn.Mode &= 1 << ifuzz.ModeProt32
case v == "mode16":
insn.Mode &= 1<<ifuzz.ModeProt16 | 1<<ifuzz.ModeReal16
case v == "eamode64",
v == "eamode32",
v == "eamode16",
v == "eanot16":
case v == "no_refining_prefix":
insn.NoRepPrefix = true
insn.No66Prefix = true
case v == "no66_prefix", v == "eosz32", v == "eosz64":
insn.No66Prefix = true
case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2":
insn.Prefix = append(insn.Prefix, 0xF2)
insn.NoRepPrefix = true
case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3":
insn.Prefix = append(insn.Prefix, 0xF3)
insn.NoRepPrefix = true
case v == "norep", v == "not_refining", v == "REP=0":
insn.NoRepPrefix = true
case v == "osz_refining_prefix":
insn.Prefix = append(insn.Prefix, 0x66)
insn.NoRepPrefix = true
case v == "rexw_prefix", v == "W1":
insn.Rexw = 1
case v == "norexw_prefix", v == "W0":
insn.Rexw = -1
case v == "MPXMODE=1",
v == "MPXMODE=0",
v == "TZCNT=1",
v == "TZCNT=0",
v == "LZCNT=1",
v == "LZCNT=0",
v == "CR_WIDTH()",
v == "DF64()",
v == "IMMUNE_REXW()",
v == "FORCE64()",
v == "EOSZ=1",
v == "EOSZ!=1",
v == "EOSZ=2",
v == "EOSZ!=2",
v == "EOSZ=3",
v == "EOSZ!=3",
v == "BRANCH_HINT()",
v == "P4=1",
v == "P4=0",
v == "rexb_prefix",
v == "norexb_prefix",
v == "IMMUNE66()",
v == "REFINING66()",
v == "IGNORE66()",
v == "IMMUNE66_LOOP64()",
v == "OVERRIDE_SEG0()",
v == "OVERRIDE_SEG1()",
v == "REMOVE_SEGMENT()",
v == "ONE()",
v == "nolock_prefix",
v == "MODRM()",
v == "VMODRM_XMM()",
v == "VMODRM_YMM()",
v == "BCRC=0",
v == "BCRC=1",
v == "ESIZE_8_BITS()",
v == "ESIZE_16_BITS()",
v == "ESIZE_32_BITS()",
v == "ESIZE_64_BITS()",
v == "NELEM_GPR_WRITER_STORE()",
v == "NELEM_GPR_WRITER_STORE_BYTE()",
v == "NELEM_GPR_WRITER_STORE_WORD()",
v == "NELEM_GPR_WRITER_LDOP_Q()",
v == "NELEM_GPR_WRITER_LDOP_D()",
v == "NELEM_GPR_READER()",
v == "NELEM_GPR_READER_BYTE()",
v == "NELEM_GPR_READER_WORD()",
v == "NELEM_GSCAT()",
v == "NELEM_HALF()",
v == "NELEM_FULL()",
v == "NELEM_FULLMEM()",
v == "NELEM_QUARTERMEM()",
v == "NELEM_EIGHTHMEM()",
v == "NELEM_HALFMEM()",
v == "NELEM_QUARTERMEM()",
v == "NELEM_MEM128()",
v == "NELEM_SCALAR()",
v == "NELEM_TUPLE1()",
v == "NELEM_TUPLE2()",
v == "NELEM_TUPLE4()",
v == "NELEM_TUPLE8()",
v == "NELEM_TUPLE1_4X()",
v == "NELEM_TUPLE1_BYTE()",
v == "NELEM_TUPLE1_WORD()",
v == "NELEM_MOVDDUP()",
v == "UISA_VMODRM_XMM()",
v == "UISA_VMODRM_YMM()",
v == "UISA_VMODRM_ZMM()",
v == "MASK=0",
v == "FIX_ROUND_LEN128()",
v == "FIX_ROUND_LEN512()",
v == "AVX512_ROUND()",
v == "ZEROING=0",
v == "SAE()",
v == "VL512", // VL=2
v == "not_refining_f3",
strings.HasPrefix(v, "MODEP5="):
default:
return errSkip(fmt.Sprintf("unknown pattern %v", v))
}
}
if insn.Modrm {
switch insn.Mod {
case -3, -1, 0, 1, 2, 3:
default:
return fmt.Errorf("bad MOD value: %v", insn.Mod)
}
if insn.Reg < -1 || insn.Reg > 7 {
return fmt.Errorf("bad REG value: %v", insn.Mod)
}
if insn.Rm < -1 || insn.Rm > 7 {
return fmt.Errorf("bad RM value: %v", insn.Mod)
}
}
if insn.Imm != 0 && len(insn.Suffix) != 0 {
return fmt.Errorf("both immediate and suffix opcode")
}
if insn.Mode == 0 {
return errSkip("no modes for instruction")
}
return nil
}
func parseOperands(insn *ifuzz.Insn, vals []string) error {
for _, v := range vals {
switch v {
case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w":
if insn.Reg != -1 {
return fmt.Errorf("REG=SEG() operand, but fixed reg")
}
insn.Reg = -6
case "REG0=CR_R():w", "REG1=CR_R():r":
if insn.Reg != -1 {
return fmt.Errorf("REG=CR_R() operand, but fixed reg")
}
insn.Reg = -8
insn.NoSibDisp = true
case "REG0=DR_R():w", "REG1=DR_R():r":
insn.NoSibDisp = true
case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int":
insn.Mem16 = true
case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int":
insn.Mem32 = true
}
}
return nil
}
func parseModrm(v string) (int8, error) {
if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' {
return 0, fmt.Errorf("malformed")
}
if v == "[mm]" || v == "[rrr]" || v == "[nnn]" {
return -1, nil
}
if !strings.HasPrefix(v, "[0b") {
return 0, fmt.Errorf("malformed")
}
var vv int8
for i := 3; i < len(v)-1; i++ {
if v[i] != '0' && v[i] != '1' {
return 0, fmt.Errorf("malformed")
}
vv *= 2
if v[i] == '1' {
vv++
}
}
return vv, nil
}
func addImm(insn *ifuzz.Insn, imm int8) {
if insn.Imm == 0 {
insn.Imm = imm
return
}
if insn.Imm2 == 0 {
insn.Imm2 = imm
return
}
panic("too many immediates")
}
func failf(msg string, args ...interface{}) {
fmt.Fprintf(os.Stderr, msg+"\n", args...)
os.Exit(1)
}