// Copyright 2017 syzkaller project authors. All rights reserved. // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. // gen generates instruction tables (ifuzz/insns.go) from Intel XED tables. // Tables used to generate insns.go are checked in in all-enc-instructions.txt. package main import ( "bufio" "fmt" "os" "reflect" "strconv" "strings" "github.com/google/syzkaller/pkg/ifuzz" "github.com/google/syzkaller/pkg/serializer" ) // nolint: gocyclo func main() { if len(os.Args) != 2 { failf("usage: gen instructions.txt") } f, err := os.Open(os.Args[1]) if err != nil { failf("failed to open input file: %v", err) } defer f.Close() skipped := 0 saved := "" var insns []*ifuzz.Insn var insn, insn1 *ifuzz.Insn s := bufio.NewScanner(f) for i := 1; s.Scan(); i++ { reportError := func(msg string, args ...interface{}) { fmt.Fprintf(os.Stderr, "line %v: %v\n", i, s.Text()) failf(msg, args...) } line := s.Text() if comment := strings.IndexByte(line, '#'); comment != -1 { line = line[:comment] } line = strings.TrimSpace(line) if line == "" { continue } if line[len(line)-1] == '\\' { saved += line[:len(line)-1] continue } line = saved + line saved = "" if line == "{" { insn = new(ifuzz.Insn) continue } if line == "}" { if insn1 != nil { insns = append(insns, insn1) insn1 = nil insn = nil } continue } colon := strings.IndexByte(line, ':') if colon == -1 { reportError("no colon") } name := strings.TrimSpace(line[:colon]) if name == "" { reportError("empty attribute name") } var vals []string for _, v := range strings.Split(line[colon+1:], " ") { v = strings.TrimSpace(v) if v == "" { continue } vals = append(vals, v) } switch name { case "ICLASS": if len(vals) != 1 { reportError("ICLASS has more than one value") } insn.Name = vals[0] case "CPL": if len(vals) != 1 { reportError("CPL has more than one value") } if vals[0] != "0" && vals[0] != "3" { reportError("unknown CPL value: %v", vals[0]) } insn.Priv = vals[0] == "0" case "EXTENSION": if len(vals) != 1 { reportError("EXTENSION has more than one value") } insn.Extension = vals[0] switch insn.Extension { case "FMA", "AVX2", "AVX", "F16C", "BMI2", "BMI", "XOP", "FMA4", "AVXAES", "BMI1", "AVX2GATHER": insn.Mode = 1<<ifuzz.ModeLong64 | 1<<ifuzz.ModeProt32 } insn.Avx2Gather = insn.Extension == "AVX2GATHER" case "PATTERN": if insn1 != nil { insns = append(insns, insn1) } insn1 = new(ifuzz.Insn) *insn1 = *insn if err := parsePattern(insn1, vals); err != nil { if _, ok := err.(errSkip); !ok { reportError(err.Error()) } if err.Error() != "" { fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) } skipped++ insn1 = nil } case "OPERANDS": if insn1 == nil { break } if err := parseOperands(insn1, vals); err != nil { if _, ok := err.(errSkip); !ok { reportError(err.Error()) } if err.Error() != "" { fmt.Fprintf(os.Stderr, "skipping %v on line %v (%v)\n", insn.Name, i, err) } skipped++ insn1 = nil } } } var deduped []*ifuzz.Insn nextInsn: for _, insn := range insns { if insn.Extension == "AVX512VEX" || insn.Extension == "AVX512EVEX" { skipped++ continue } mod0 := insn.Mod for j := len(deduped) - 1; j >= 0; j-- { insn1 := deduped[j] if insn.Mod == 3 && insn1.Mod == -3 || insn.Mod == -3 && insn1.Mod == 3 || insn1.Mod == -1 { insn.Mod = insn1.Mod } if reflect.DeepEqual(insn, insn1) { if insn.Mod != mod0 { insn1.Mod = -1 } continue nextInsn } insn.Mod = mod0 } deduped = append(deduped, insn) } fmt.Fprintf(os.Stderr, "deduped %v instructions\n", len(insns)-len(deduped)) insns = deduped fmt.Printf("// AUTOGENERATED FILE\n\n") fmt.Printf("package ifuzz\n\n") fmt.Printf("import . \"github.com/google/syzkaller/pkg/ifuzz\"\n\n") fmt.Printf("func init() { Insns = insns }\n\n") fmt.Printf("var insns = ") serializer.Write(os.Stdout, insns) fmt.Fprintf(os.Stderr, "handled %v, skipped %v\n", len(insns), skipped) } type errSkip string func (err errSkip) Error() string { return string(err) } // nolint: gocyclo func parsePattern(insn *ifuzz.Insn, vals []string) error { if insn.Opcode != nil { return fmt.Errorf("PATTERN is already parsed for the instruction") } // As spelled these have incorrect format for 16-bit addressing mode and with 67 prefix. if insn.Name == "NOP5" || insn.Name == "NOP6" || insn.Name == "NOP7" || insn.Name == "NOP8" || insn.Name == "NOP9" { return errSkip("") } if insn.Mode == 0 { insn.Mode = 1<<ifuzz.ModeLast - 1 } insn.Mod = -100 insn.Reg = -100 insn.Rm = -100 insn.VexP = -1 for _, v := range vals { switch { case strings.HasPrefix(v, "0x"): op, err := strconv.ParseUint(v, 0, 8) if err != nil { return fmt.Errorf("failed to parse hex pattern: %v", v) } if !insn.Modrm { insn.Opcode = append(insn.Opcode, byte(op)) } else { insn.Suffix = append(insn.Suffix, byte(op)) } case strings.HasPrefix(v, "0b"): if len(v) != 8 || v[6] != '_' { return fmt.Errorf("failed to parse bin pattern: %v", v) } var op byte if v[2] == '1' { op |= 1 << 7 } if v[3] == '1' { op |= 1 << 6 } if v[4] == '1' { op |= 1 << 5 } if v[5] == '1' { op |= 1 << 4 } if v[7] == '1' { op |= 1 << 3 } insn.Opcode = append(insn.Opcode, op) case strings.HasPrefix(v, "MOD["): insn.Modrm = true vv, err := parseModrm(v[3:]) if err != nil { return fmt.Errorf("failed to parse %v: %v", v, err) } insn.Mod = vv case strings.HasPrefix(v, "REG["): insn.Modrm = true vv, err := parseModrm(v[3:]) if err != nil { return fmt.Errorf("failed to parse %v: %v", v, err) } insn.Reg = vv case strings.HasPrefix(v, "RM["): insn.Modrm = true vv, err := parseModrm(v[2:]) if err != nil { return fmt.Errorf("failed to parse %v: %v", v, err) } insn.Rm = vv case v == "RM=4": insn.Rm = 4 case strings.HasPrefix(v, "SRM["): vv, err := parseModrm(v[3:]) if err != nil { return fmt.Errorf("failed to parse %v: %v", v, err) } insn.Rm = vv insn.Srm = true case v == "SRM=0", v == "SRM!=0": case v == "MOD!=3": if !insn.Modrm || insn.Mod != -1 { return fmt.Errorf("MOD!=3 without MOD") } insn.Mod = -3 case v == "MOD=3": // Most other instructions contain "MOD[0b11] MOD=3", // but BNDCL contains "MOD[mm] MOD=3" insn.Mod = 3 case v == "MOD=0": insn.Mod = 0 case v == "MOD=1": insn.Mod = 1 case v == "MOD=2": insn.Mod = 2 case v == "lock_prefix": insn.Prefix = append(insn.Prefix, 0xF0) // Immediates. case v == "UIMM8()", v == "SIMM8()": addImm(insn, 1) case v == "UIMM16()": addImm(insn, 2) case v == "UIMM32()": addImm(insn, 4) case v == "SIMMz()": addImm(insn, -1) case v == "UIMMv()": addImm(insn, -3) case v == "UIMM8_1()": addImm(insn, 1) case v == "BRDISP8()": addImm(insn, 1) case v == "BRDISP32()": addImm(insn, 4) case v == "BRDISPz()": addImm(insn, -1) case v == "MEMDISPv()": addImm(insn, -2) // VOP/VEX case v == "XOPV": insn.Vex = 0x8f insn.Mode &^= 1 << ifuzz.ModeReal16 case v == "EVV": insn.Vex = 0xc4 case v == "VV1": insn.Vex = 0xc4 case v == "VMAP0": insn.VexMap = 0 case v == "V0F": insn.VexMap = 1 case v == "V0F38": insn.VexMap = 2 case v == "V0F3A": insn.VexMap = 3 case v == "XMAP8": insn.VexMap = 8 case v == "XMAP9": insn.VexMap = 9 case v == "XMAPA": insn.VexMap = 10 case v == "VNP": insn.VexP = 0 case v == "V66": insn.VexP = 1 case v == "VF2": insn.VexP = 3 case v == "VF3": insn.VexP = 2 case v == "VL128", v == "VL=0": insn.VexL = -1 case v == "VL256", v == "VL=1": insn.VexL = 1 case v == "NOVSR": insn.VexNoR = true case v == "NOEVSR": insn.VexNoR = true // VEXDEST3=0b1 VEXDEST210=0b111 VEXDEST4=0b0 case v == "SE_IMM8()": addImm(insn, 1) // Modes case v == "mode64": insn.Mode &= 1 << ifuzz.ModeLong64 case v == "not64": insn.Mode &^= 1 << ifuzz.ModeLong64 case v == "mode32": insn.Mode &= 1 << ifuzz.ModeProt32 case v == "mode16": insn.Mode &= 1<<ifuzz.ModeProt16 | 1<<ifuzz.ModeReal16 case v == "eamode64", v == "eamode32", v == "eamode16", v == "eanot16": case v == "no_refining_prefix": insn.NoRepPrefix = true insn.No66Prefix = true case v == "no66_prefix", v == "eosz32", v == "eosz64": insn.No66Prefix = true case v == "f2_refining_prefix", v == "refining_f2", v == "repne", v == "REP=2": insn.Prefix = append(insn.Prefix, 0xF2) insn.NoRepPrefix = true case v == "f3_refining_prefix", v == "refining_f3", v == "repe", v == "REP=3": insn.Prefix = append(insn.Prefix, 0xF3) insn.NoRepPrefix = true case v == "norep", v == "not_refining", v == "REP=0": insn.NoRepPrefix = true case v == "osz_refining_prefix": insn.Prefix = append(insn.Prefix, 0x66) insn.NoRepPrefix = true case v == "rexw_prefix", v == "W1": insn.Rexw = 1 case v == "norexw_prefix", v == "W0": insn.Rexw = -1 case v == "MPXMODE=1", v == "MPXMODE=0", v == "TZCNT=1", v == "TZCNT=0", v == "LZCNT=1", v == "LZCNT=0", v == "CR_WIDTH()", v == "DF64()", v == "IMMUNE_REXW()", v == "FORCE64()", v == "EOSZ=1", v == "EOSZ!=1", v == "EOSZ=2", v == "EOSZ!=2", v == "EOSZ=3", v == "EOSZ!=3", v == "BRANCH_HINT()", v == "P4=1", v == "P4=0", v == "rexb_prefix", v == "norexb_prefix", v == "IMMUNE66()", v == "REFINING66()", v == "IGNORE66()", v == "IMMUNE66_LOOP64()", v == "OVERRIDE_SEG0()", v == "OVERRIDE_SEG1()", v == "REMOVE_SEGMENT()", v == "ONE()", v == "nolock_prefix", v == "MODRM()", v == "VMODRM_XMM()", v == "VMODRM_YMM()", v == "BCRC=0", v == "BCRC=1", v == "ESIZE_8_BITS()", v == "ESIZE_16_BITS()", v == "ESIZE_32_BITS()", v == "ESIZE_64_BITS()", v == "NELEM_GPR_WRITER_STORE()", v == "NELEM_GPR_WRITER_STORE_BYTE()", v == "NELEM_GPR_WRITER_STORE_WORD()", v == "NELEM_GPR_WRITER_LDOP_Q()", v == "NELEM_GPR_WRITER_LDOP_D()", v == "NELEM_GPR_READER()", v == "NELEM_GPR_READER_BYTE()", v == "NELEM_GPR_READER_WORD()", v == "NELEM_GSCAT()", v == "NELEM_HALF()", v == "NELEM_FULL()", v == "NELEM_FULLMEM()", v == "NELEM_QUARTERMEM()", v == "NELEM_EIGHTHMEM()", v == "NELEM_HALFMEM()", v == "NELEM_QUARTERMEM()", v == "NELEM_MEM128()", v == "NELEM_SCALAR()", v == "NELEM_TUPLE1()", v == "NELEM_TUPLE2()", v == "NELEM_TUPLE4()", v == "NELEM_TUPLE8()", v == "NELEM_TUPLE1_4X()", v == "NELEM_TUPLE1_BYTE()", v == "NELEM_TUPLE1_WORD()", v == "NELEM_MOVDDUP()", v == "UISA_VMODRM_XMM()", v == "UISA_VMODRM_YMM()", v == "UISA_VMODRM_ZMM()", v == "MASK=0", v == "FIX_ROUND_LEN128()", v == "FIX_ROUND_LEN512()", v == "AVX512_ROUND()", v == "ZEROING=0", v == "SAE()", v == "VL512", // VL=2 v == "not_refining_f3", strings.HasPrefix(v, "MODEP5="): default: return errSkip(fmt.Sprintf("unknown pattern %v", v)) } } if insn.Modrm { switch insn.Mod { case -3, -1, 0, 1, 2, 3: default: return fmt.Errorf("bad MOD value: %v", insn.Mod) } if insn.Reg < -1 || insn.Reg > 7 { return fmt.Errorf("bad REG value: %v", insn.Mod) } if insn.Rm < -1 || insn.Rm > 7 { return fmt.Errorf("bad RM value: %v", insn.Mod) } } if insn.Imm != 0 && len(insn.Suffix) != 0 { return fmt.Errorf("both immediate and suffix opcode") } if insn.Mode == 0 { return errSkip("no modes for instruction") } return nil } func parseOperands(insn *ifuzz.Insn, vals []string) error { for _, v := range vals { switch v { case "REG0=SEG():r", "REG1=SEG():r", "REG0=SEG():w": if insn.Reg != -1 { return fmt.Errorf("REG=SEG() operand, but fixed reg") } insn.Reg = -6 case "REG0=CR_R():w", "REG1=CR_R():r": if insn.Reg != -1 { return fmt.Errorf("REG=CR_R() operand, but fixed reg") } insn.Reg = -8 insn.NoSibDisp = true case "REG0=DR_R():w", "REG1=DR_R():r": insn.NoSibDisp = true case "MEM0:r:mem16", "MEM0:w:mem16", "MEM0:r:mem16int", "MEM0:w:mem16int": insn.Mem16 = true case "MEM0:r:mem32real", "MEM0:r:mem32int", "MEM0:w:mem32real", "MEM0:w:mem32int": insn.Mem32 = true } } return nil } func parseModrm(v string) (int8, error) { if len(v) < 4 || len(v) > 7 || v[0] != '[' || v[len(v)-1] != ']' { return 0, fmt.Errorf("malformed") } if v == "[mm]" || v == "[rrr]" || v == "[nnn]" { return -1, nil } if !strings.HasPrefix(v, "[0b") { return 0, fmt.Errorf("malformed") } var vv int8 for i := 3; i < len(v)-1; i++ { if v[i] != '0' && v[i] != '1' { return 0, fmt.Errorf("malformed") } vv *= 2 if v[i] == '1' { vv++ } } return vv, nil } func addImm(insn *ifuzz.Insn, imm int8) { if insn.Imm == 0 { insn.Imm = imm return } if insn.Imm2 == 0 { insn.Imm2 = imm return } panic("too many immediates") } func failf(msg string, args ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", args...) os.Exit(1) }