diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 | 
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2014-11-24 17:02:24 +0000 | 
| commit | 91bc56ed825ba56b3cc264aa5c95ab84f86832ab (patch) | |
| tree | 4df130b28021d86e13bf4565ef58c1c5a5e093b4 /contrib/llvm/lib/Target/X86/Disassembler | |
| parent | 9efc7e72bb1daf5d6019871d9c93a1c488a11229 (diff) | |
| parent | 5ca98fd98791947eba83a1ed3f2c8191ef7afa6c (diff) | |
Notes
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Disassembler')
| -rw-r--r-- | contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 207 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h | 27 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp (renamed from contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c) | 554 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 404 | ||||
| -rw-r--r-- | contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 366 | 
5 files changed, 835 insertions, 723 deletions
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 903e36cfe6ce..521bd21b81c6 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -27,26 +27,30 @@  #include "llvm/Support/TargetRegistry.h"  #include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::X86Disassembler; + +#define DEBUG_TYPE "x86-disassembler" +  #define GET_REGINFO_ENUM  #include "X86GenRegisterInfo.inc"  #define GET_INSTRINFO_ENUM  #include "X86GenInstrInfo.inc" +#define GET_SUBTARGETINFO_ENUM +#include "X86GenSubtargetInfo.inc" -using namespace llvm; -using namespace llvm::X86Disassembler; - -void x86DisassemblerDebug(const char *file, -                          unsigned line, -                          const char *s) { +void llvm::X86Disassembler::Debug(const char *file, unsigned line, +                                  const char *s) {    dbgs() << file << ":" << line << ": " << s;  } -const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) { +const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, +                                                const void *mii) {    const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);    return MII->getName(Opcode);  } -#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); +#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));  namespace llvm {   @@ -72,13 +76,25 @@ static bool translateInstruction(MCInst &target,                                  InternalInstruction &source,                                  const MCDisassembler *Dis); -X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, -                                               DisassemblerMode mode, -                                               const MCInstrInfo *MII) -  : MCDisassembler(STI), MII(MII), fMode(mode) {} - -X86GenericDisassembler::~X86GenericDisassembler() { -  delete MII; +X86GenericDisassembler::X86GenericDisassembler( +                                         const MCSubtargetInfo &STI, +                                         MCContext &Ctx, +                                         std::unique_ptr<const MCInstrInfo> MII) +  : MCDisassembler(STI, Ctx), MII(std::move(MII)) { +  switch (STI.getFeatureBits() & +          (X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) { +  case X86::Mode16Bit: +    fMode = MODE_16BIT; +    break; +  case X86::Mode32Bit: +    fMode = MODE_32BIT; +    break; +  case X86::Mode64Bit: +    fMode = MODE_64BIT; +    break; +  default: +    llvm_unreachable("Invalid CPU mode"); +  }  }  /// regionReader - a callback function that wraps the readByte method from @@ -124,14 +140,14 @@ X86GenericDisassembler::getInstruction(MCInst &instr,    dlog_t loggerFn = logger;    if (&vStream == &nulls()) -    loggerFn = 0; // Disable logging completely if it's going to nulls(). +    loggerFn = nullptr; // Disable logging completely if it's going to nulls().    int ret = decodeInstruction(&internalInstr,                                regionReader,                                (const void*)®ion,                                loggerFn,                                (void*)&vStream, -                              (const void*)MII, +                              (const void*)MII.get(),                                address,                                fMode); @@ -207,6 +223,61 @@ static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,    Dis->tryAddingPcLoadReferenceComment(Value, Address);  } +static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { +  0,        // SEG_OVERRIDE_NONE +  X86::CS, +  X86::SS, +  X86::DS, +  X86::ES, +  X86::FS, +  X86::GS +}; + +/// translateSrcIndex   - Appends a source index operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction. +static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { +  unsigned baseRegNo; + +  if (insn.mode == MODE_64BIT) +    baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; +  else if (insn.mode == MODE_32BIT) +    baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; +  else { +    assert(insn.mode == MODE_16BIT); +    baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; +  } +  MCOperand baseReg = MCOperand::CreateReg(baseRegNo); +  mcInst.addOperand(baseReg); + +  MCOperand segmentReg; +  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); +  mcInst.addOperand(segmentReg); +  return false; +} + +/// translateDstIndex   - Appends a destination index operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction. + +static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { +  unsigned baseRegNo; + +  if (insn.mode == MODE_64BIT) +    baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; +  else if (insn.mode == MODE_32BIT) +    baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; +  else { +    assert(insn.mode == MODE_16BIT); +    baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; +  } +  MCOperand baseReg = MCOperand::CreateReg(baseRegNo); +  mcInst.addOperand(baseReg); +  return false; +} +  /// translateImmediate  - Appends an immediate operand to an MCInst.  ///  /// @param mcInst       - The MCInst to append to. @@ -248,7 +319,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,    }    // By default sign-extend all X86 immediates based on their encoding.    else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || -           type == TYPE_IMM64) { +           type == TYPE_IMM64 || type == TYPE_IMMv) {      uint32_t Opcode = mcInst.getOpcode();      switch (operand.encoding) {      default: @@ -315,6 +386,13 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,                                 insn.immediateOffset, insn.immediateSize,                                 mcInst, Dis))      mcInst.addOperand(MCOperand::CreateImm(immediate)); + +  if (type == TYPE_MOFFS8 || type == TYPE_MOFFS16 || +      type == TYPE_MOFFS32 || type == TYPE_MOFFS64) { +    MCOperand segmentReg; +    segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); +    mcInst.addOperand(segmentReg); +  }  }  /// translateRMRegister - Translates a register stored in the R/M field of the @@ -418,13 +496,22 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,      bool IndexIs256 = (Opcode == X86::VGATHERQPDYrm ||                         Opcode == X86::VGATHERDPSYrm ||                         Opcode == X86::VGATHERQPSYrm || +                       Opcode == X86::VGATHERDPDZrm || +                       Opcode == X86::VPGATHERDQZrm ||                         Opcode == X86::VPGATHERQQYrm ||                         Opcode == X86::VPGATHERDDYrm ||                         Opcode == X86::VPGATHERQDYrm); -    if (IndexIs128 || IndexIs256) { +    bool IndexIs512 = (Opcode == X86::VGATHERQPDZrm || +                       Opcode == X86::VGATHERDPSZrm || +                       Opcode == X86::VGATHERQPSZrm || +                       Opcode == X86::VPGATHERQQZrm || +                       Opcode == X86::VPGATHERDDZrm || +                       Opcode == X86::VPGATHERQDZrm); +    if (IndexIs128 || IndexIs256 || IndexIs512) {        unsigned IndexOffset = insn.sibIndex -                           (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); -      SIBIndex IndexBase = IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; +      SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : +                           IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;        insn.sibIndex = (SIBIndex)(IndexBase +                              (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));      } @@ -513,17 +600,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,    }    displacement = MCOperand::CreateImm(insn.displacement); -   -  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { -    0,        // SEG_OVERRIDE_NONE -    X86::CS, -    X86::SS, -    X86::DS, -    X86::ES, -    X86::FS, -    X86::GS -  }; -   +    segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);    mcInst.addOperand(baseReg); @@ -565,6 +642,9 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,    case TYPE_XMM128:    case TYPE_XMM256:    case TYPE_XMM512: +  case TYPE_VK1: +  case TYPE_VK8: +  case TYPE_VK16:    case TYPE_DEBUGREG:    case TYPE_CONTROLREG:      return translateRMRegister(mcInst, insn); @@ -596,16 +676,25 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,  ///  /// @param mcInst       - The MCInst to append to.  /// @param stackPos     - The stack position to translate. -/// @return             - 0 on success; nonzero otherwise. -static bool translateFPRegister(MCInst &mcInst, -                               uint8_t stackPos) { -  if (stackPos >= 8) { -    debug("Invalid FP stack position"); +static void translateFPRegister(MCInst &mcInst, +                                uint8_t stackPos) { +  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); +} + +/// translateMaskRegister - Translates a 3-bit mask register number to +///   LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param maskRegNum   - Number of mask register from 0 to 7. +/// @return             - false on success; true otherwise. +static bool translateMaskRegister(MCInst &mcInst, +                                uint8_t maskRegNum) { +  if (maskRegNum >= 8) { +    debug("Invalid mask register number");      return true;    } -   -  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos)); +  mcInst.addOperand(MCOperand::CreateReg(X86::K0 + maskRegNum));    return false;  } @@ -626,7 +715,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,    case ENCODING_REG:      translateRegister(mcInst, insn.reg);      return false; -  case ENCODING_RM: +  case ENCODING_WRITEMASK: +    return translateMaskRegister(mcInst, insn.writemask); +  CASE_ENCODING_RM:      return translateRM(mcInst, operand, insn, Dis);    case ENCODING_CB:    case ENCODING_CW: @@ -648,17 +739,20 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,                         insn,                         Dis);      return false; +  case ENCODING_SI: +    return translateSrcIndex(mcInst, insn); +  case ENCODING_DI: +    return translateDstIndex(mcInst, insn);    case ENCODING_RB:    case ENCODING_RW:    case ENCODING_RD:    case ENCODING_RO: -    translateRegister(mcInst, insn.opcodeRegister); -    return false; -  case ENCODING_I: -    return translateFPRegister(mcInst, insn.opcodeModifier);    case ENCODING_Rv:      translateRegister(mcInst, insn.opcodeRegister);      return false; +  case ENCODING_FP: +    translateFPRegister(mcInst, insn.modRM & 7); +    return false;    case ENCODING_VVVV:      translateRegister(mcInst, insn.vvvv);      return false; @@ -693,13 +787,11 @@ static bool translateInstruction(MCInst &mcInst,        mcInst.setOpcode(X86::XACQUIRE_PREFIX);    } -  int index; -      insn.numImmediatesTranslated = 0; -  for (index = 0; index < X86_MAX_OPERANDS; ++index) { -    if (insn.operands[index].encoding != ENCODING_NONE) { -      if (translateOperand(mcInst, insn.operands[index], insn, Dis)) { +  for (const auto &Op : insn.operands) { +    if (Op.encoding != ENCODING_NONE) { +      if (translateOperand(mcInst, Op, insn, Dis)) {          return true;        }      } @@ -708,22 +800,17 @@ static bool translateInstruction(MCInst &mcInst,    return false;  } -static MCDisassembler *createX86_32Disassembler(const Target &T, -                                                const MCSubtargetInfo &STI) { -  return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, -                                                     T.createMCInstrInfo()); -} - -static MCDisassembler *createX86_64Disassembler(const Target &T, -                                                const MCSubtargetInfo &STI) { -  return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, -                                                     T.createMCInstrInfo()); +static MCDisassembler *createX86Disassembler(const Target &T, +                                             const MCSubtargetInfo &STI, +                                             MCContext &Ctx) { +  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); +  return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));  }  extern "C" void LLVMInitializeX86Disassembler() {     // Register the disassembler.    TargetRegistry::RegisterMCDisassembler(TheX86_32Target,  -                                         createX86_32Disassembler); +                                         createX86Disassembler);    TargetRegistry::RegisterMCDisassembler(TheX86_64Target, -                                         createX86_64Disassembler); +                                         createX86Disassembler);  } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h index b92427a7e91a..4dc7c29078fc 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h @@ -74,17 +74,7 @@  #ifndef X86DISASSEMBLER_H  #define X86DISASSEMBLER_H -#define INSTRUCTION_SPECIFIER_FIELDS \ -  uint16_t operands; - -#define INSTRUCTION_IDS               \ -  uint16_t instructionIDs; -  #include "X86DisassemblerDecoderCommon.h" - -#undef INSTRUCTION_SPECIFIER_FIELDS -#undef INSTRUCTION_IDS -  #include "llvm/MC/MCDisassembler.h"  namespace llvm { @@ -101,24 +91,19 @@ namespace X86Disassembler {  ///   All each platform class should have to do is subclass the constructor, and  ///   provide a different disassemblerMode value.  class X86GenericDisassembler : public MCDisassembler { -  const MCInstrInfo *MII; +  std::unique_ptr<const MCInstrInfo> MII;  public:    /// Constructor     - Initializes the disassembler.    /// -  /// @param mode     - The X86 architecture mode to decode for. -  X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode, -                         const MCInstrInfo *MII); -private: -  ~X86GenericDisassembler(); +  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, +                         std::unique_ptr<const MCInstrInfo> MII);  public:    /// getInstruction - See MCDisassembler. -  DecodeStatus getInstruction(MCInst &instr, -                              uint64_t &size, -                              const MemoryObject ®ion, -                              uint64_t address, +  DecodeStatus getInstruction(MCInst &instr, uint64_t &size, +                              const MemoryObject ®ion, uint64_t address,                                raw_ostream &vStream, -                              raw_ostream &cStream) const; +                              raw_ostream &cStream) const override;  private:    DisassemblerMode              fMode; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 16ee0d357b77..ab3d1f774bc7 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1,17 +1,17 @@ -/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* - * - *                     The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains the implementation of the instruction decoder. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ +//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the implementation of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===//  #include <stdarg.h>   /* for va_*()       */  #include <stdio.h>    /* for vsnprintf()  */ @@ -20,13 +20,35 @@  #include "X86DisassemblerDecoder.h" -#include "X86GenDisassemblerTables.inc" +using namespace llvm::X86Disassembler; + +/// Specifies whether a ModR/M byte is needed and (if so) which +/// instruction each possible value of the ModR/M byte corresponds to.  Once +/// this information is known, we have narrowed down to a single instruction. +struct ModRMDecision { +  uint8_t modrm_type; +  uint16_t instructionIDs; +}; + +/// Specifies which set of ModR/M->instruction tables to look at +/// given a particular opcode. +struct OpcodeDecision { +  ModRMDecision modRMDecisions[256]; +}; + +/// Specifies which opcode->instruction tables to look at given +/// a particular context (set of attributes).  Since there are many possible +/// contexts, the decoder first uses CONTEXTS_SYM to determine which context +/// applies given a specific set of attributes.  Hence there are only IC_max +/// entries in this table, rather than 2^(ATTR_max). +struct ContextDecision { +  OpcodeDecision opcodeDecisions[IC_max]; +}; -#define TRUE  1 -#define FALSE 0 +#include "X86GenDisassemblerTables.inc"  #ifndef NDEBUG -#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0) +#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)  #else  #define debug(s) do { } while (0)  #endif @@ -40,8 +62,8 @@   * @return          - The InstructionContext to use when looking up an   *                    an instruction with these attributes.   */ -static InstructionContext contextForAttrs(uint8_t attrMask) { -  return CONTEXTS_SYM[attrMask]; +static InstructionContext contextForAttrs(uint16_t attrMask) { +  return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);  }  /* @@ -53,12 +75,12 @@ static InstructionContext contextForAttrs(uint8_t attrMask) {   *                      contextForAttrs.   * @param opcode      - The last byte of the instruction's opcode, not counting   *                      ModR/M extensions and escapes. - * @return            - TRUE if the ModR/M byte is required, FALSE otherwise. + * @return            - true if the ModR/M byte is required, false otherwise.   */  static int modRMRequired(OpcodeType type,                           InstructionContext insnContext, -                         uint8_t opcode) { -  const struct ContextDecision* decision = 0; +                         uint16_t opcode) { +  const struct ContextDecision* decision = nullptr;    switch (type) {    case ONEBYTE: @@ -73,12 +95,6 @@ static int modRMRequired(OpcodeType type,    case THREEBYTE_3A:      decision = &THREEBYTE3A_SYM;      break; -  case THREEBYTE_A6: -    decision = &THREEBYTEA6_SYM; -    break; -  case THREEBYTE_A7: -    decision = &THREEBYTEA7_SYM; -    break;    case XOP8_MAP:      decision = &XOP8_MAP_SYM;      break; @@ -108,7 +124,7 @@ static InstrUID decode(OpcodeType type,                         InstructionContext insnContext,                         uint8_t opcode,                         uint8_t modRM) { -  const struct ModRMDecision* dec = 0; +  const struct ModRMDecision* dec = nullptr;    switch (type) {    case ONEBYTE: @@ -123,12 +139,6 @@ static InstrUID decode(OpcodeType type,    case THREEBYTE_3A:      dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];      break; -  case THREEBYTE_A6: -    dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; -    break; -  case THREEBYTE_A7: -    dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; -    break;    case XOP8_MAP:      dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];      break; @@ -296,15 +306,15 @@ static void setPrefixPresent(struct InternalInstruction* insn,   * @param location  - The location to query.   * @return          - Whether the prefix is at that location.   */ -static BOOL isPrefixAtLocation(struct InternalInstruction* insn, +static bool isPrefixAtLocation(struct InternalInstruction* insn,                                 uint8_t prefix,                                 uint64_t location)  {    if (insn->prefixPresent[prefix] == 1 &&       insn->prefixLocations[prefix] == location) -    return TRUE; +    return true;    else -    return FALSE; +    return false;  }  /* @@ -317,14 +327,14 @@ static BOOL isPrefixAtLocation(struct InternalInstruction* insn,   *                bytes, and no prefixes conflicted; nonzero otherwise.   */  static int readPrefixes(struct InternalInstruction* insn) { -  BOOL isPrefix = TRUE; -  BOOL prefixGroups[4] = { FALSE }; +  bool isPrefix = true; +  bool prefixGroups[4] = { false };    uint64_t prefixLocation;    uint8_t byte = 0;    uint8_t nextByte; -  BOOL hasAdSize = FALSE; -  BOOL hasOpSize = FALSE; +  bool hasAdSize = false; +  bool hasOpSize = false;    dbgprintf(insn, "readPrefixes()"); @@ -356,7 +366,7 @@ static int readPrefixes(struct InternalInstruction* insn) {        if ((byte == 0xf2 || byte == 0xf3) &&            ((nextByte == 0xf0) |            ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) -        insn->xAcquireRelease = TRUE; +        insn->xAcquireRelease = true;        /*         * Also if the byte is 0xf3, and the following condition is met:         * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or @@ -366,7 +376,7 @@ static int readPrefixes(struct InternalInstruction* insn) {        if (byte == 0xf3 &&            (nextByte == 0x88 || nextByte == 0x89 ||             nextByte == 0xc6 || nextByte == 0xc7)) -        insn->xAcquireRelease = TRUE; +        insn->xAcquireRelease = true;        if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {          if (consumeByte(insn, &nextByte))            return -1; @@ -384,7 +394,7 @@ static int readPrefixes(struct InternalInstruction* insn) {      case 0xf3:  /* REP or REPE/REPZ */        if (prefixGroups[0])          dbgprintf(insn, "Redundant Group 1 prefix"); -      prefixGroups[0] = TRUE; +      prefixGroups[0] = true;        setPrefixPresent(insn, byte, prefixLocation);        break;      case 0x2e:  /* CS segment override -OR- Branch not taken */ @@ -418,25 +428,25 @@ static int readPrefixes(struct InternalInstruction* insn) {        }        if (prefixGroups[1])          dbgprintf(insn, "Redundant Group 2 prefix"); -      prefixGroups[1] = TRUE; +      prefixGroups[1] = true;        setPrefixPresent(insn, byte, prefixLocation);        break;      case 0x66:  /* Operand-size override */        if (prefixGroups[2])          dbgprintf(insn, "Redundant Group 3 prefix"); -      prefixGroups[2] = TRUE; -      hasOpSize = TRUE; +      prefixGroups[2] = true; +      hasOpSize = true;        setPrefixPresent(insn, byte, prefixLocation);        break;      case 0x67:  /* Address-size override */        if (prefixGroups[3])          dbgprintf(insn, "Redundant Group 4 prefix"); -      prefixGroups[3] = TRUE; -      hasAdSize = TRUE; +      prefixGroups[3] = true; +      hasAdSize = true;        setPrefixPresent(insn, byte, prefixLocation);        break;      default:    /* Not a prefix byte */ -      isPrefix = FALSE; +      isPrefix = false;        break;      } @@ -444,9 +454,58 @@ static int readPrefixes(struct InternalInstruction* insn) {        dbgprintf(insn, "Found prefix 0x%hhx", byte);    } -  insn->vexXopType = TYPE_NO_VEX_XOP; +  insn->vectorExtensionType = TYPE_NO_VEX_XOP; + +  if (byte == 0x62) { +    uint8_t byte1, byte2; + +    if (consumeByte(insn, &byte1)) { +      dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); +      return -1; +    } -  if (byte == 0xc4) { +    if (lookAtByte(insn, &byte2)) { +      dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); +      return -1; +    } + +    if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && +       ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { +      insn->vectorExtensionType = TYPE_EVEX; +    } +    else { +      unconsumeByte(insn); /* unconsume byte1 */ +      unconsumeByte(insn); /* unconsume byte  */ +      insn->necessaryPrefixLocation = insn->readerCursor - 2; +    } + +    if (insn->vectorExtensionType == TYPE_EVEX) { +      insn->vectorExtensionPrefix[0] = byte; +      insn->vectorExtensionPrefix[1] = byte1; +      if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { +        dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); +        return -1; +      } +      if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { +        dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); +        return -1; +      } + +      /* We simulate the REX prefix for simplicity's sake */ +      if (insn->mode == MODE_64BIT) { +        insn->rexPrefix = 0x40 +                        | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); +      } + +      dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", +              insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +              insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); +    } +  } +  else if (byte == 0xc4) {      uint8_t byte1;      if (lookAtByte(insn, &byte1)) { @@ -455,7 +514,7 @@ static int readPrefixes(struct InternalInstruction* insn) {      }      if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { -      insn->vexXopType = TYPE_VEX_3B; +      insn->vectorExtensionType = TYPE_VEX_3B;        insn->necessaryPrefixLocation = insn->readerCursor - 1;      }      else { @@ -463,33 +522,24 @@ static int readPrefixes(struct InternalInstruction* insn) {        insn->necessaryPrefixLocation = insn->readerCursor - 1;      } -    if (insn->vexXopType == TYPE_VEX_3B) { -      insn->vexXopPrefix[0] = byte; -      consumeByte(insn, &insn->vexXopPrefix[1]); -      consumeByte(insn, &insn->vexXopPrefix[2]); +    if (insn->vectorExtensionType == TYPE_VEX_3B) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]); +      consumeByte(insn, &insn->vectorExtensionPrefix[2]);        /* We simulate the REX prefix for simplicity's sake */        if (insn->mode == MODE_64BIT) {          insn->rexPrefix = 0x40 -                        | (wFromVEX3of3(insn->vexXopPrefix[2]) << 3) -                        | (rFromVEX2of3(insn->vexXopPrefix[1]) << 2) -                        | (xFromVEX2of3(insn->vexXopPrefix[1]) << 1) -                        | (bFromVEX2of3(insn->vexXopPrefix[1]) << 0); -      } - -      switch (ppFromVEX3of3(insn->vexXopPrefix[2])) -      { -      default: -        break; -      case VEX_PREFIX_66: -        hasOpSize = TRUE; -        break; +                        | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);        }        dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", -                insn->vexXopPrefix[0], insn->vexXopPrefix[1], -                insn->vexXopPrefix[2]); +                insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +                insn->vectorExtensionPrefix[2]);      }    }    else if (byte == 0xc5) { @@ -501,31 +551,33 @@ static int readPrefixes(struct InternalInstruction* insn) {      }      if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { -      insn->vexXopType = TYPE_VEX_2B; +      insn->vectorExtensionType = TYPE_VEX_2B;      }      else {        unconsumeByte(insn);      } -    if (insn->vexXopType == TYPE_VEX_2B) { -      insn->vexXopPrefix[0] = byte; -      consumeByte(insn, &insn->vexXopPrefix[1]); +    if (insn->vectorExtensionType == TYPE_VEX_2B) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]);        if (insn->mode == MODE_64BIT) {          insn->rexPrefix = 0x40 -                        | (rFromVEX2of2(insn->vexXopPrefix[1]) << 2); +                        | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);        } -      switch (ppFromVEX2of2(insn->vexXopPrefix[1])) +      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1]))        {        default:          break;        case VEX_PREFIX_66: -        hasOpSize = TRUE; +        hasOpSize = true;          break;        } -      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexXopPrefix[0], insn->vexXopPrefix[1]); +      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", +                insn->vectorExtensionPrefix[0], +                insn->vectorExtensionPrefix[1]);      }    }    else if (byte == 0x8f) { @@ -537,7 +589,7 @@ static int readPrefixes(struct InternalInstruction* insn) {      }      if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ -      insn->vexXopType = TYPE_XOP; +      insn->vectorExtensionType = TYPE_XOP;        insn->necessaryPrefixLocation = insn->readerCursor - 1;      }      else { @@ -545,33 +597,33 @@ static int readPrefixes(struct InternalInstruction* insn) {        insn->necessaryPrefixLocation = insn->readerCursor - 1;      } -    if (insn->vexXopType == TYPE_XOP) { -      insn->vexXopPrefix[0] = byte; -      consumeByte(insn, &insn->vexXopPrefix[1]); -      consumeByte(insn, &insn->vexXopPrefix[2]); +    if (insn->vectorExtensionType == TYPE_XOP) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]); +      consumeByte(insn, &insn->vectorExtensionPrefix[2]);        /* We simulate the REX prefix for simplicity's sake */        if (insn->mode == MODE_64BIT) {          insn->rexPrefix = 0x40 -                        | (wFromXOP3of3(insn->vexXopPrefix[2]) << 3) -                        | (rFromXOP2of3(insn->vexXopPrefix[1]) << 2) -                        | (xFromXOP2of3(insn->vexXopPrefix[1]) << 1) -                        | (bFromXOP2of3(insn->vexXopPrefix[1]) << 0); +                        | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);        } -      switch (ppFromXOP3of3(insn->vexXopPrefix[2])) +      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2]))        {        default:          break;        case VEX_PREFIX_66: -        hasOpSize = TRUE; +        hasOpSize = true;          break;        }        dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", -                insn->vexXopPrefix[0], insn->vexXopPrefix[1], -                insn->vexXopPrefix[2]); +                insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +                insn->vectorExtensionPrefix[2]);      }    }    else { @@ -646,13 +698,29 @@ static int readOpcode(struct InternalInstruction* insn) {    insn->opcodeType = ONEBYTE; -  if (insn->vexXopType == TYPE_VEX_3B) +  if (insn->vectorExtensionType == TYPE_EVEX)    { -    switch (mmmmmFromVEX2of3(insn->vexXopPrefix[1])) -    { +    switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { +    default: +      dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", +                mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); +      return -1; +    case VEX_LOB_0F: +      insn->opcodeType = TWOBYTE; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F38: +      insn->opcodeType = THREEBYTE_38; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F3A: +      insn->opcodeType = THREEBYTE_3A; +      return consumeByte(insn, &insn->opcode); +    } +  } +  else if (insn->vectorExtensionType == TYPE_VEX_3B) { +    switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {      default:        dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", -                mmmmmFromVEX2of3(insn->vexXopPrefix[1])); +                mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));        return -1;      case VEX_LOB_0F:        insn->opcodeType = TWOBYTE; @@ -665,18 +733,15 @@ static int readOpcode(struct InternalInstruction* insn) {        return consumeByte(insn, &insn->opcode);      }    } -  else if (insn->vexXopType == TYPE_VEX_2B) -  { +  else if (insn->vectorExtensionType == TYPE_VEX_2B) {      insn->opcodeType = TWOBYTE;      return consumeByte(insn, &insn->opcode);    } -  else if (insn->vexXopType == TYPE_XOP) -  { -    switch (mmmmmFromXOP2of3(insn->vexXopPrefix[1])) -    { +  else if (insn->vectorExtensionType == TYPE_XOP) { +    switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {      default:        dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", -                mmmmmFromVEX2of3(insn->vexXopPrefix[1])); +                mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1]));        return -1;      case XOP_MAP_SELECT_8:        insn->opcodeType = XOP8_MAP; @@ -713,20 +778,6 @@ static int readOpcode(struct InternalInstruction* insn) {          return -1;        insn->opcodeType = THREEBYTE_3A; -    } else if (current == 0xa6) { -      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - -      if (consumeByte(insn, ¤t)) -        return -1; - -      insn->opcodeType = THREEBYTE_A6; -    } else if (current == 0xa7) { -      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); - -      if (consumeByte(insn, ¤t)) -        return -1; - -      insn->opcodeType = THREEBYTE_A7;      } else {        dbgprintf(insn, "Didn't find a three-byte escape prefix"); @@ -760,12 +811,10 @@ static int readModRM(struct InternalInstruction* insn);   */  static int getIDWithAttrMask(uint16_t* instructionID,                               struct InternalInstruction* insn, -                             uint8_t attrMask) { -  BOOL hasModRMExtension; +                             uint16_t attrMask) { +  bool hasModRMExtension; -  uint8_t instructionClass; - -  instructionClass = contextForAttrs(attrMask); +  InstructionContext instructionClass = contextForAttrs(attrMask);    hasModRMExtension = modRMRequired(insn->opcodeType,                                      instructionClass, @@ -796,14 +845,14 @@ static int getIDWithAttrMask(uint16_t* instructionID,   * @param orig  - The instruction that is not 16-bit   * @param equiv - The instruction that is 16-bit   */ -static BOOL is16BitEquivalent(const char* orig, const char* equiv) { +static bool is16BitEquivalent(const char* orig, const char* equiv) {    off_t i;    for (i = 0;; i++) {      if (orig[i] == '\0' && equiv[i] == '\0') -      return TRUE; +      return true;      if (orig[i] == '\0' || equiv[i] == '\0') -      return FALSE; +      return false;      if (orig[i] != equiv[i]) {        if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')          continue; @@ -811,7 +860,7 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {          continue;        if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')          continue; -      return FALSE; +      return false;      }    }  } @@ -826,7 +875,7 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {   *                nonzero otherwise.   */  static int getID(struct InternalInstruction* insn, const void *miiArg) { -  uint8_t attrMask; +  uint16_t attrMask;    uint16_t instructionID;    dbgprintf(insn, "getID()"); @@ -836,11 +885,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {    if (insn->mode == MODE_64BIT)      attrMask |= ATTR_64BIT; -  if (insn->vexXopType != TYPE_NO_VEX_XOP) { -    attrMask |= ATTR_VEX; +  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { +    attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; -    if (insn->vexXopType == TYPE_VEX_3B) { -      switch (ppFromVEX3of3(insn->vexXopPrefix[2])) { +    if (insn->vectorExtensionType == TYPE_EVEX) { +      switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {        case VEX_PREFIX_66:          attrMask |= ATTR_OPSIZE;          break; @@ -852,11 +901,35 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {          break;        } -      if (lFromVEX3of3(insn->vexXopPrefix[2])) +      if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXKZ; +      if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXB; +      if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXK; +      if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXL; +      if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXL2; +    } +    else if (insn->vectorExtensionType == TYPE_VEX_3B) { +      switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { +      case VEX_PREFIX_66: +        attrMask |= ATTR_OPSIZE; +        break; +      case VEX_PREFIX_F3: +        attrMask |= ATTR_XS; +        break; +      case VEX_PREFIX_F2: +        attrMask |= ATTR_XD; +        break; +      } + +      if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))          attrMask |= ATTR_VEXL;      } -    else if (insn->vexXopType == TYPE_VEX_2B) { -      switch (ppFromVEX2of2(insn->vexXopPrefix[1])) { +    else if (insn->vectorExtensionType == TYPE_VEX_2B) { +      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {        case VEX_PREFIX_66:          attrMask |= ATTR_OPSIZE;          break; @@ -868,11 +941,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {          break;        } -      if (lFromVEX2of2(insn->vexXopPrefix[1])) +      if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))          attrMask |= ATTR_VEXL;      } -    else if (insn->vexXopType == TYPE_XOP) { -      switch (ppFromXOP3of3(insn->vexXopPrefix[2])) { +    else if (insn->vectorExtensionType == TYPE_XOP) { +      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {        case VEX_PREFIX_66:          attrMask |= ATTR_OPSIZE;          break; @@ -884,7 +957,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {          break;        } -      if (lFromXOP3of3(insn->vexXopPrefix[2])) +      if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))          attrMask |= ATTR_VEXL;      }      else { @@ -892,7 +965,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {      }    }    else { -    if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) +    if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))        attrMask |= ATTR_OPSIZE;      else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))        attrMask |= ATTR_ADSIZE; @@ -908,9 +981,29 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {    if (getIDWithAttrMask(&instructionID, insn, attrMask))      return -1; +  /* +   * JCXZ/JECXZ need special handling for 16-bit mode because the meaning +   * of the AdSize prefix is inverted w.r.t. 32-bit mode. +   */ +  if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { +    const struct InstructionSpecifier *spec; +    spec = specifierForUID(instructionID); + +    /* +     * Check for Ii8PCRel instructions. We could alternatively do a +     * string-compare on the names, but this is probably cheaper. +     */ +    if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { +      attrMask ^= ATTR_ADSIZE; +      if (getIDWithAttrMask(&instructionID, insn, attrMask)) +        return -1; +    } +  } +    /* The following clauses compensate for limitations of the tables. */ -  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { +  if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && +      !(attrMask & ATTR_OPSIZE)) {      /*       * The instruction tables make no distinction between instructions that       * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a @@ -938,11 +1031,11 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {        return 0;      } -    specName = x86DisassemblerGetInstrName(instructionID, miiArg); -    specWithOpSizeName = -      x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); +    specName = GetInstrName(instructionID, miiArg); +    specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); -    if (is16BitEquivalent(specName, specWithOpSizeName)) { +    if (is16BitEquivalent(specName, specWithOpSizeName) && +        (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {        insn->instructionID = instructionIDWithOpsize;        insn->spec = specifierForUID(instructionIDWithOpsize);      } else { @@ -1003,8 +1096,8 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {   * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.   */  static int readSIB(struct InternalInstruction* insn) { -  SIBIndex sibIndexBase = 0; -  SIBBase sibBaseBase = 0; +  SIBIndex sibIndexBase = SIB_INDEX_NONE; +  SIBBase sibBaseBase = SIB_BASE_NONE;    uint8_t index, base;    dbgprintf(insn, "readSIB()"); @@ -1012,13 +1105,12 @@ static int readSIB(struct InternalInstruction* insn) {    if (insn->consumedSIB)      return 0; -  insn->consumedSIB = TRUE; +  insn->consumedSIB = true;    switch (insn->addressSize) {    case 2:      dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");      return -1; -    break;    case 4:      sibIndexBase = SIB_INDEX_EAX;      sibBaseBase = SIB_BASE_EAX; @@ -1033,6 +1125,8 @@ static int readSIB(struct InternalInstruction* insn) {      return -1;    index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); +  if (insn->vectorExtensionType == TYPE_EVEX) +    index |= v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4;    switch (index) {    case 0x4: @@ -1109,12 +1203,12 @@ static int readDisplacement(struct InternalInstruction* insn) {    if (insn->consumedDisplacement)      return 0; -  insn->consumedDisplacement = TRUE; +  insn->consumedDisplacement = true;    insn->displacementOffset = insn->readerCursor - insn->startLocation;    switch (insn->eaDisplacement) {    case EA_DISP_NONE: -    insn->consumedDisplacement = FALSE; +    insn->consumedDisplacement = false;      break;    case EA_DISP_8:      if (consumeInt8(insn, &d8)) @@ -1133,7 +1227,7 @@ static int readDisplacement(struct InternalInstruction* insn) {      break;    } -  insn->consumedDisplacement = TRUE; +  insn->consumedDisplacement = true;    return 0;  } @@ -1154,7 +1248,7 @@ static int readModRM(struct InternalInstruction* insn) {    if (consumeByte(insn, &insn->modRM))      return -1; -  insn->consumedModRM = TRUE; +  insn->consumedModRM = true;    mod     = modFromModRM(insn->modRM);    rm      = rmFromModRM(insn->modRM); @@ -1182,6 +1276,10 @@ static int readModRM(struct InternalInstruction* insn) {    reg |= rFromREX(insn->rexPrefix) << 3;    rm  |= bFromREX(insn->rexPrefix) << 3; +  if (insn->vectorExtensionType == TYPE_EVEX) { +    reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; +    rm  |=  xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; +  }    insn->reg = (Reg)(insn->regBase + reg); @@ -1204,6 +1302,7 @@ static int readModRM(struct InternalInstruction* insn) {      case 0x1:        insn->eaBase = (EABase)(insn->eaBaseBase + rm);        insn->eaDisplacement = EA_DISP_8; +      insn->displacementSize = 1;        if (readDisplacement(insn))          return -1;        break; @@ -1228,12 +1327,12 @@ static int readModRM(struct InternalInstruction* insn) {      case 0x0:        insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */        switch (rm) { +      case 0x14:        case 0x4:        case 0xc:   /* in case REXW.b is set */          insn->eaBase = (insn->addressSize == 4 ?                          EA_BASE_sib : EA_BASE_sib64); -        readSIB(insn); -        if (readDisplacement(insn)) +        if (readSIB(insn) || readDisplacement(insn))            return -1;          break;        case 0x5: @@ -1248,14 +1347,16 @@ static int readModRM(struct InternalInstruction* insn) {        }        break;      case 0x1: +      insn->displacementSize = 1; +      /* FALLTHROUGH */      case 0x2:        insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);        switch (rm) { +      case 0x14:        case 0x4:        case 0xc:   /* in case REXW.b is set */          insn->eaBase = EA_BASE_sib; -        readSIB(insn); -        if (readDisplacement(insn)) +        if (readSIB(insn) || readDisplacement(insn))            return -1;          break;        default: @@ -1311,6 +1412,10 @@ static int readModRM(struct InternalInstruction* insn) {      case TYPE_XMM32:                                      \      case TYPE_XMM:                                        \        return prefix##_XMM0 + index;                       \ +    case TYPE_VK1:                                        \ +    case TYPE_VK8:                                        \ +    case TYPE_VK16:                                       \ +      return prefix##_K0 + index;                         \      case TYPE_MM64:                                       \      case TYPE_MM32:                                       \      case TYPE_MM:                                         \ @@ -1383,7 +1488,7 @@ static int fixupReg(struct InternalInstruction *insn,      if (!valid)        return -1;      break; -  case ENCODING_RM: +  CASE_ENCODING_RM:      if (insn->eaBase >= insn->eaRegBase) {        insn->eaBase = (EABase)fixupRMValue(insn,                                            (OperandType)op->type, @@ -1399,44 +1504,11 @@ static int fixupReg(struct InternalInstruction *insn,  }  /* - * readOpcodeModifier - Reads an operand from the opcode field of an - *   instruction.  Handles AddRegFrm instructions. - * - * @param insn    - The instruction whose opcode field is to be read. - * @param inModRM - Indicates that the opcode field is to be read from the - *                  ModR/M extension; useful for escape opcodes - * @return        - 0 on success; nonzero otherwise. - */ -static int readOpcodeModifier(struct InternalInstruction* insn) { -  dbgprintf(insn, "readOpcodeModifier()"); - -  if (insn->consumedOpcodeModifier) -    return 0; - -  insn->consumedOpcodeModifier = TRUE; - -  switch (insn->spec->modifierType) { -  default: -    debug("Unknown modifier type."); -    return -1; -  case MODIFIER_NONE: -    debug("No modifier but an operand expects one."); -    return -1; -  case MODIFIER_OPCODE: -    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase; -    return 0; -  case MODIFIER_MODRM: -    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase; -    return 0; -  } -} - -/*   * readOpcodeRegister - Reads an operand from the opcode field of an   *   instruction and interprets it appropriately given the operand width.   *   Handles AddRegFrm instructions.   * - * @param insn  - See readOpcodeModifier(). + * @param insn  - the instruction whose opcode field is to be read.   * @param size  - The width (in bytes) of the register being specified.   *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means   *                RAX. @@ -1445,16 +1517,13 @@ static int readOpcodeModifier(struct InternalInstruction* insn) {  static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {    dbgprintf(insn, "readOpcodeRegister()"); -  if (readOpcodeModifier(insn)) -    return -1; -    if (size == 0)      size = insn->registerSize;    switch (size) {    case 1:      insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) -                                                  | insn->opcodeModifier)); +                                                  | (insn->opcode & 7)));      if (insn->rexPrefix &&          insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&          insn->opcodeRegister < MODRM_REG_AL + 0x8) { @@ -1466,17 +1535,17 @@ static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {    case 2:      insn->opcodeRegister = (Reg)(MODRM_REG_AX                                   + ((bFromREX(insn->rexPrefix) << 3) -                                    | insn->opcodeModifier)); +                                    | (insn->opcode & 7)));      break;    case 4:      insn->opcodeRegister = (Reg)(MODRM_REG_EAX                                   + ((bFromREX(insn->rexPrefix) << 3) -                                    | insn->opcodeModifier)); +                                    | (insn->opcode & 7)));      break;    case 8:      insn->opcodeRegister = (Reg)(MODRM_REG_RAX                                   + ((bFromREX(insn->rexPrefix) << 3) -                                    | insn->opcodeModifier)); +                                    | (insn->opcode & 7)));      break;    } @@ -1549,18 +1618,41 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {  static int readVVVV(struct InternalInstruction* insn) {    dbgprintf(insn, "readVVVV()"); -  if (insn->vexXopType == TYPE_VEX_3B) -    insn->vvvv = vvvvFromVEX3of3(insn->vexXopPrefix[2]); -  else if (insn->vexXopType == TYPE_VEX_2B) -    insn->vvvv = vvvvFromVEX2of2(insn->vexXopPrefix[1]); -  else if (insn->vexXopType == TYPE_XOP) -    insn->vvvv = vvvvFromXOP3of3(insn->vexXopPrefix[2]); +  int vvvv; +  if (insn->vectorExtensionType == TYPE_EVEX) +    vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | +            vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); +  else if (insn->vectorExtensionType == TYPE_VEX_3B) +    vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); +  else if (insn->vectorExtensionType == TYPE_VEX_2B) +    vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); +  else if (insn->vectorExtensionType == TYPE_XOP) +    vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);    else      return -1;    if (insn->mode != MODE_64BIT) -    insn->vvvv &= 0x7; +    vvvv &= 0x7; + +  insn->vvvv = static_cast<Reg>(vvvv); +  return 0; +} + +/* + * readMaskRegister - Reads an mask register from the opcode field of an + *   instruction. + * + * @param insn    - The instruction whose opcode field is to be read. + * @return        - 0 on success; nonzero otherwise. + */ +static int readMaskRegister(struct InternalInstruction* insn) { +  dbgprintf(insn, "readMaskRegister()"); + +  if (insn->vectorExtensionType != TYPE_EVEX) +    return -1; +  insn->writemask = +      static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));    return 0;  } @@ -1572,7 +1664,6 @@ static int readVVVV(struct InternalInstruction* insn) {   * @return      - 0 if all operands could be read; nonzero otherwise.   */  static int readOperands(struct InternalInstruction* insn) { -  int index;    int hasVVVV, needVVVV;    int sawRegImm = 0; @@ -1583,16 +1674,21 @@ static int readOperands(struct InternalInstruction* insn) {    hasVVVV = !readVVVV(insn);    needVVVV = hasVVVV && (insn->vvvv != 0); -  for (index = 0; index < X86_MAX_OPERANDS; ++index) { -    switch (x86OperandSets[insn->spec->operands][index].encoding) { +  for (const auto &Op : x86OperandSets[insn->spec->operands]) { +    switch (Op.encoding) {      case ENCODING_NONE: +    case ENCODING_SI: +    case ENCODING_DI:        break;      case ENCODING_REG: -    case ENCODING_RM: +    CASE_ENCODING_RM:        if (readModRM(insn))          return -1; -      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) +      if (fixupReg(insn, &Op))          return -1; +      // Apply the AVX512 compressed displacement scaling factor. +      if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) +        insn->displacement *= 1 << (Op.encoding - ENCODING_RM);        break;      case ENCODING_CB:      case ENCODING_CW: @@ -1613,14 +1709,14 @@ static int readOperands(struct InternalInstruction* insn) {        }        if (readImmediate(insn, 1))          return -1; -      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 && +      if (Op.type == TYPE_IMM3 &&            insn->immediates[insn->numImmediatesConsumed - 1] > 7)          return -1; -      if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 && +      if (Op.type == TYPE_IMM5 &&            insn->immediates[insn->numImmediatesConsumed - 1] > 31)          return -1; -      if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 || -          x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256) +      if (Op.type == TYPE_XMM128 || +          Op.type == TYPE_XMM256)          sawRegImm = 1;        break;      case ENCODING_IW: @@ -1663,15 +1759,17 @@ static int readOperands(struct InternalInstruction* insn) {        if (readOpcodeRegister(insn, 0))          return -1;        break; -    case ENCODING_I: -      if (readOpcodeModifier(insn)) -        return -1; +    case ENCODING_FP:        break;      case ENCODING_VVVV:        needVVVV = 0; /* Mark that we have found a VVVV operand. */        if (!hasVVVV)          return -1; -      if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index])) +      if (fixupReg(insn, &Op)) +        return -1; +      break; +    case ENCODING_WRITEMASK: +      if (readMaskRegister(insn))          return -1;        break;      case ENCODING_DUP: @@ -1708,14 +1806,10 @@ static int readOperands(struct InternalInstruction* insn) {   * @return          - 0 if the instruction's memory could be read; nonzero if   *                    not.   */ -int decodeInstruction(struct InternalInstruction* insn, -                      byteReader_t reader, -                      const void* readerArg, -                      dlog_t logger, -                      void* loggerArg, -                      const void* miiArg, -                      uint64_t startLoc, -                      DisassemblerMode mode) { +int llvm::X86Disassembler::decodeInstruction( +    struct InternalInstruction *insn, byteReader_t reader, +    const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, +    uint64_t startLoc, DisassemblerMode mode) {    memset(insn, 0, sizeof(struct InternalInstruction));    insn->reader = reader; @@ -1734,7 +1828,7 @@ int decodeInstruction(struct InternalInstruction* insn,        readOperands(insn))      return -1; -  insn->operands = &x86OperandSets[insn->spec->operands][0]; +  insn->operands = x86OperandSets[insn->spec->operands];    insn->length = insn->readerCursor - insn->startLocation; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 6d03d5ca5f36..8c45402ab5e1 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -1,39 +1,28 @@ -/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* - * - *                     The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains the public interface of the instruction decoder. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ +//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the public interface of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===//  #ifndef X86DISASSEMBLERDECODER_H  #define X86DISASSEMBLERDECODER_H -#ifdef __cplusplus -extern "C" { -#endif - -#define INSTRUCTION_SPECIFIER_FIELDS \ -  uint16_t operands; - -#define INSTRUCTION_IDS     \ -  uint16_t instructionIDs; -  #include "X86DisassemblerDecoderCommon.h" +#include "llvm/ADT/ArrayRef.h" -#undef INSTRUCTION_SPECIFIER_FIELDS -#undef INSTRUCTION_IDS +namespace llvm { +namespace X86Disassembler { -/* - * Accessor functions for various fields of an Intel instruction - */ +// Accessor functions for various fields of an Intel instruction  #define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6)  #define regFromModRM(modRM)  (((modRM) & 0x38) >> 3)  #define rmFromModRM(modRM)   ((modRM) & 0x7) @@ -45,6 +34,21 @@ extern "C" {  #define xFromREX(rex)        (((rex) & 0x2) >> 1)  #define bFromREX(rex)        ((rex) & 0x1) +#define rFromEVEX2of4(evex)     (((~(evex)) & 0x80) >> 7) +#define xFromEVEX2of4(evex)     (((~(evex)) & 0x40) >> 6) +#define bFromEVEX2of4(evex)     (((~(evex)) & 0x20) >> 5) +#define r2FromEVEX2of4(evex)    (((~(evex)) & 0x10) >> 4) +#define mmFromEVEX2of4(evex)    ((evex) & 0x3) +#define wFromEVEX3of4(evex)     (((evex) & 0x80) >> 7) +#define vvvvFromEVEX3of4(evex)  (((~(evex)) & 0x78) >> 3) +#define ppFromEVEX3of4(evex)    ((evex) & 0x3) +#define zFromEVEX4of4(evex)     (((evex) & 0x80) >> 7) +#define l2FromEVEX4of4(evex)    (((evex) & 0x40) >> 6) +#define lFromEVEX4of4(evex)     (((evex) & 0x20) >> 5) +#define bFromEVEX4of4(evex)     (((evex) & 0x10) >> 4) +#define v2FromEVEX4of4(evex)    (((~evex) & 0x8) >> 3) +#define aaaFromEVEX4of4(evex)   ((evex) & 0x7) +  #define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7)  #define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6)  #define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5) @@ -68,10 +72,7 @@ extern "C" {  #define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2)  #define ppFromXOP3of3(xop)      ((xop) & 0x3) -/* - * These enums represent Intel registers for use by the decoder. - */ - +// These enums represent Intel registers for use by the decoder.  #define REGS_8BIT     \    ENTRY(AL)           \    ENTRY(CL)           \ @@ -314,6 +315,16 @@ extern "C" {    ENTRY(ZMM30)    \    ENTRY(ZMM31) +#define REGS_MASKS \ +  ENTRY(K0)        \ +  ENTRY(K1)        \ +  ENTRY(K2)        \ +  ENTRY(K3)        \ +  ENTRY(K4)        \ +  ENTRY(K5)        \ +  ENTRY(K6)        \ +  ENTRY(K7) +  #define REGS_SEGMENT \    ENTRY(ES)          \    ENTRY(CS)          \ @@ -361,18 +372,17 @@ extern "C" {    REGS_XMM            \    REGS_YMM            \    REGS_ZMM            \ +  REGS_MASKS          \    REGS_SEGMENT        \    REGS_DEBUG          \    REGS_CONTROL        \    ENTRY(RIP) -/* - * EABase - All possible values of the base field for effective-address - *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We - *   distinguish between bases (EA_BASE_*) and registers that just happen to be - *   referred to when Mod == 0b11 (EA_REG_*). - */ -typedef enum { +/// \brief All possible values of the base field for effective-address +/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte. +/// We distinguish between bases (EA_BASE_*) and registers that just happen +/// to be referred to when Mod == 0b11 (EA_REG_*). +enum EABase {    EA_BASE_NONE,  #define ENTRY(x) EA_BASE_##x,    ALL_EA_BASES @@ -381,15 +391,13 @@ typedef enum {    ALL_REGS  #undef ENTRY    EA_max -} EABase; - -/* - * SIBIndex - All possible values of the SIB index field. - *   Borrows entries from ALL_EA_BASES with the special case that - *   sib is synonymous with NONE. - * Vector SIB: index can be XMM or YMM. - */ -typedef enum { +}; + +/// \brief All possible values of the SIB index field. +/// borrows entries from ALL_EA_BASES with the special case that +/// sib is synonymous with NONE. +/// Vector SIB: index can be XMM or YMM. +enum SIBIndex {    SIB_INDEX_NONE,  #define ENTRY(x) SIB_INDEX_##x,    ALL_EA_BASES @@ -398,23 +406,18 @@ typedef enum {    REGS_ZMM  #undef ENTRY    SIB_INDEX_max -} SIBIndex; +}; -/* - * SIBBase - All possible values of the SIB base field. - */ -typedef enum { +/// \brief All possible values of the SIB base field. +enum SIBBase {    SIB_BASE_NONE,  #define ENTRY(x) SIB_BASE_##x,    ALL_SIB_BASES  #undef ENTRY    SIB_BASE_max -} SIBBase; +}; -/* - * EADisplacement - Possible displacement types for effective-address - *   computations. - */ +/// \brief Possible displacement types for effective-address computations.  typedef enum {    EA_DISP_NONE,    EA_DISP_8, @@ -422,20 +425,16 @@ typedef enum {    EA_DISP_32  } EADisplacement; -/* - * Reg - All possible values of the reg field in the ModR/M byte. - */ -typedef enum { +/// \brief All possible values of the reg field in the ModR/M byte. +enum Reg {  #define ENTRY(x) MODRM_REG_##x,    ALL_REGS  #undef ENTRY    MODRM_REG_max -} Reg; +}; -/* - * SegmentOverride - All possible segment overrides. - */ -typedef enum { +/// \brief All possible segment overrides. +enum SegmentOverride {    SEG_OVERRIDE_NONE,    SEG_OVERRIDE_CS,    SEG_OVERRIDE_SS, @@ -444,233 +443,220 @@ typedef enum {    SEG_OVERRIDE_FS,    SEG_OVERRIDE_GS,    SEG_OVERRIDE_max -} SegmentOverride; - -/* - * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field - */ +}; -typedef enum { +/// \brief Possible values for the VEX.m-mmmm field +enum VEXLeadingOpcodeByte {    VEX_LOB_0F = 0x1,    VEX_LOB_0F38 = 0x2,    VEX_LOB_0F3A = 0x3 -} VEXLeadingOpcodeByte; +}; -typedef enum { +enum XOPMapSelect {    XOP_MAP_SELECT_8 = 0x8,    XOP_MAP_SELECT_9 = 0x9,    XOP_MAP_SELECT_A = 0xA -} XOPMapSelect; - -/* - * VEXPrefixCode - Possible values for the VEX.pp field - */ +}; -typedef enum { +/// \brief Possible values for the VEX.pp/EVEX.pp field +enum VEXPrefixCode {    VEX_PREFIX_NONE = 0x0,    VEX_PREFIX_66 = 0x1,    VEX_PREFIX_F3 = 0x2,    VEX_PREFIX_F2 = 0x3 -} VEXPrefixCode; +}; -typedef enum { -  TYPE_NO_VEX_XOP = 0x0, -  TYPE_VEX_2B = 0x1, -  TYPE_VEX_3B = 0x2, -  TYPE_XOP = 0x3 -} VEXXOPType; - -typedef uint8_t BOOL; - -/* - * byteReader_t - Type for the byte reader that the consumer must provide to - *   the decoder.  Reads a single byte from the instruction's address space. - * @param arg     - A baton that the consumer can associate with any internal - *                  state that it needs. - * @param byte    - A pointer to a single byte in memory that should be set to - *                  contain the value at address. - * @param address - The address in the instruction's address space that should - *                  be read from. - * @return        - -1 if the byte cannot be read for any reason; 0 otherwise. - */ -typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address); - -/* - * dlog_t - Type for the logging function that the consumer can provide to - *   get debugging output from the decoder. - * @param arg     - A baton that the consumer can associate with any internal - *                  state that it needs. - * @param log     - A string that contains the message.  Will be reused after - *                  the logger returns. - */ -typedef void (*dlog_t)(void* arg, const char *log); - -/* - * The x86 internal instruction, which is produced by the decoder. - */ +enum VectorExtensionType { +  TYPE_NO_VEX_XOP   = 0x0, +  TYPE_VEX_2B       = 0x1, +  TYPE_VEX_3B       = 0x2, +  TYPE_EVEX         = 0x3, +  TYPE_XOP          = 0x4 +}; + +/// \brief Type for the byte reader that the consumer must provide to +/// the decoder. Reads a single byte from the instruction's address space. +/// \param arg     A baton that the consumer can associate with any internal +///                state that it needs. +/// \param byte    A pointer to a single byte in memory that should be set to +///                contain the value at address. +/// \param address The address in the instruction's address space that should +///                be read from. +/// \return        -1 if the byte cannot be read for any reason; 0 otherwise. +typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address); + +/// \brief Type for the logging function that the consumer can provide to +/// get debugging output from the decoder. +/// \param arg A baton that the consumer can associate with any internal +///            state that it needs. +/// \param log A string that contains the message.  Will be reused after +///            the logger returns. +typedef void (*dlog_t)(void *arg, const char *log); + +/// The specification for how to extract and interpret a full instruction and +/// its operands. +struct InstructionSpecifier { +  uint16_t operands; +}; + +/// The x86 internal instruction, which is produced by the decoder.  struct InternalInstruction { -  /* Reader interface (C) */ +  // Reader interface (C)    byteReader_t reader; -  /* Opaque value passed to the reader */ +  // Opaque value passed to the reader    const void* readerArg; -  /* The address of the next byte to read via the reader */ +  // The address of the next byte to read via the reader    uint64_t readerCursor; -  /* Logger interface (C) */ +  // Logger interface (C)    dlog_t dlog; -  /* Opaque value passed to the logger */ +  // Opaque value passed to the logger    void* dlogArg; -  /* General instruction information */ +  // General instruction information -  /* The mode to disassemble for (64-bit, protected, real) */ +  // The mode to disassemble for (64-bit, protected, real)    DisassemblerMode mode; -  /* The start of the instruction, usable with the reader */ +  // The start of the instruction, usable with the reader    uint64_t startLocation; -  /* The length of the instruction, in bytes */ +  // The length of the instruction, in bytes    size_t length; -  /* Prefix state */ +  // Prefix state -  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */ +  // 1 if the prefix byte corresponding to the entry is present; 0 if not    uint8_t prefixPresent[0x100]; -  /* contains the location (for use with the reader) of the prefix byte */ +  // contains the location (for use with the reader) of the prefix byte    uint64_t prefixLocations[0x100]; -  /* The value of the VEX/XOP prefix, if present */ -  uint8_t vexXopPrefix[3]; -  /* The length of the VEX prefix (0 if not present) */ -  VEXXOPType vexXopType; -  /* The value of the REX prefix, if present */ +  // The value of the vector extension prefix(EVEX/VEX/XOP), if present +  uint8_t vectorExtensionPrefix[4]; +  // The type of the vector extension prefix +  VectorExtensionType vectorExtensionType; +  // The value of the REX prefix, if present    uint8_t rexPrefix; -  /* The location where a mandatory prefix would have to be (i.e., right before -     the opcode, or right before the REX prefix if one is present) */ +  // The location where a mandatory prefix would have to be (i.e., right before +  // the opcode, or right before the REX prefix if one is present).    uint64_t necessaryPrefixLocation; -  /* The segment override type */ +  // The segment override type    SegmentOverride segmentOverride; -  /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */ -  BOOL xAcquireRelease; +  // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease +  bool xAcquireRelease; -  /* Sizes of various critical pieces of data, in bytes */ +  // Sizes of various critical pieces of data, in bytes    uint8_t registerSize;    uint8_t addressSize;    uint8_t displacementSize;    uint8_t immediateSize; -  /* Offsets from the start of the instruction to the pieces of data, which is -     needed to find relocation entries for adding symbolic operands */ +  // Offsets from the start of the instruction to the pieces of data, which is +  // needed to find relocation entries for adding symbolic operands.    uint8_t displacementOffset;    uint8_t immediateOffset; -  /* opcode state */ +  // opcode state -  /* The last byte of the opcode, not counting any ModR/M extension */ +  // The last byte of the opcode, not counting any ModR/M extension    uint8_t opcode; -  /* The ModR/M byte of the instruction, if it is an opcode extension */ +  // The ModR/M byte of the instruction, if it is an opcode extension    uint8_t modRMExtension; -  /* decode state */ +  // decode state -  /* The type of opcode, used for indexing into the array of decode tables */ +  // The type of opcode, used for indexing into the array of decode tables    OpcodeType opcodeType; -  /* The instruction ID, extracted from the decode table */ +  // The instruction ID, extracted from the decode table    uint16_t instructionID; -  /* The specifier for the instruction, from the instruction info table */ -  const struct InstructionSpecifier *spec; +  // The specifier for the instruction, from the instruction info table +  const InstructionSpecifier *spec; -  /* state for additional bytes, consumed during operand decode.  Pattern: -     consumed___ indicates that the byte was already consumed and does not -     need to be consumed again */ +  // state for additional bytes, consumed during operand decode.  Pattern: +  // consumed___ indicates that the byte was already consumed and does not +  // need to be consumed again. -  /* The VEX.vvvv field, which contains a third register operand for some AVX -     instructions */ +  // The VEX.vvvv field, which contains a third register operand for some AVX +  // instructions.    Reg                           vvvv; -  /* The ModR/M byte, which contains most register operands and some portion of -     all memory operands */ -  BOOL                          consumedModRM; +  // The writemask for AVX-512 instructions which is contained in EVEX.aaa +  Reg                           writemask; + +  // The ModR/M byte, which contains most register operands and some portion of +  // all memory operands. +  bool                          consumedModRM;    uint8_t                       modRM; -  /* The SIB byte, used for more complex 32- or 64-bit memory operands */ -  BOOL                          consumedSIB; +  // The SIB byte, used for more complex 32- or 64-bit memory operands +  bool                          consumedSIB;    uint8_t                       sib; -  /* The displacement, used for memory operands */ -  BOOL                          consumedDisplacement; +  // The displacement, used for memory operands +  bool                          consumedDisplacement;    int32_t                       displacement; -  /* Immediates.  There can be two in some cases */ +  // Immediates.  There can be two in some cases    uint8_t                       numImmediatesConsumed;    uint8_t                       numImmediatesTranslated;    uint64_t                      immediates[2]; -  /* A register or immediate operand encoded into the opcode */ -  BOOL                          consumedOpcodeModifier; -  uint8_t                       opcodeModifier; +  // A register or immediate operand encoded into the opcode    Reg                           opcodeRegister; -  /* Portions of the ModR/M byte */ +  // Portions of the ModR/M byte -  /* These fields determine the allowable values for the ModR/M fields, which -     depend on operand and address widths */ +  // These fields determine the allowable values for the ModR/M fields, which +  // depend on operand and address widths.    EABase                        eaBaseBase;    EABase                        eaRegBase;    Reg                           regBase; -  /* The Mod and R/M fields can encode a base for an effective address, or a -     register.  These are separated into two fields here */ +  // The Mod and R/M fields can encode a base for an effective address, or a +  // register.  These are separated into two fields here.    EABase                        eaBase;    EADisplacement                eaDisplacement; -  /* The reg field always encodes a register */ +  // The reg field always encodes a register    Reg                           reg; -  /* SIB state */ +  // SIB state    SIBIndex                      sibIndex;    uint8_t                       sibScale;    SIBBase                       sibBase; -  const struct OperandSpecifier *operands; +  ArrayRef<OperandSpecifier> operands;  }; -/* decodeInstruction - Decode one instruction and store the decoding results in - *   a buffer provided by the consumer. - * @param insn      - The buffer to store the instruction in.  Allocated by the - *                    consumer. - * @param reader    - The byteReader_t for the bytes to be read. - * @param readerArg - An argument to pass to the reader for storing context - *                    specific to the consumer.  May be NULL. - * @param logger    - The dlog_t to be used in printing status messages from the - *                    disassembler.  May be NULL. - * @param loggerArg - An argument to pass to the logger for storing context - *                    specific to the logger.  May be NULL. - * @param startLoc  - The address (in the reader's address space) of the first - *                    byte in the instruction. - * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in. - * @return          - Nonzero if there was an error during decode, 0 otherwise. - */ -int decodeInstruction(struct InternalInstruction* insn, +/// \brief Decode one instruction and store the decoding results in +/// a buffer provided by the consumer. +/// \param insn      The buffer to store the instruction in.  Allocated by the +///                  consumer. +/// \param reader    The byteReader_t for the bytes to be read. +/// \param readerArg An argument to pass to the reader for storing context +///                  specific to the consumer.  May be NULL. +/// \param logger    The dlog_t to be used in printing status messages from the +///                  disassembler.  May be NULL. +/// \param loggerArg An argument to pass to the logger for storing context +///                  specific to the logger.  May be NULL. +/// \param startLoc  The address (in the reader's address space) of the first +///                  byte in the instruction. +/// \param mode      The mode (16-bit, 32-bit, 64-bit) to decode in. +/// \return          Nonzero if there was an error during decode, 0 otherwise. +int decodeInstruction(InternalInstruction *insn,                        byteReader_t reader, -                      const void* readerArg, +                      const void *readerArg,                        dlog_t logger, -                      void* loggerArg, -                      const void* miiArg, +                      void *loggerArg, +                      const void *miiArg,                        uint64_t startLoc,                        DisassemblerMode mode); -/* x86DisassemblerDebug - C-accessible function for printing a message to - *   debugs() - * @param file  - The name of the file printing the debug message. - * @param line  - The line number that printed the debug message. - * @param s     - The message to print. - */ +/// \brief Print a message to debugs() +/// \param file The name of the file printing the debug message. +/// \param line The line number that printed the debug message. +/// \param s    The message to print. +void Debug(const char *file, unsigned line, const char *s); -void x86DisassemblerDebug(const char *file, -                          unsigned line, -                          const char *s); +const char *GetInstrName(unsigned Opcode, const void *mii); -const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii); - -#ifdef __cplusplus -} -#endif +} // namespace X86Disassembler +} // namespace llvm  #endif diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index dd1719c64d76..13a7b557b440 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -1,37 +1,33 @@ -/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* - * - *                     The LLVM Compiler Infrastructure - * - * This file is distributed under the University of Illinois Open Source - * License. See LICENSE.TXT for details. - * - *===----------------------------------------------------------------------===* - * - * This file is part of the X86 Disassembler. - * It contains common definitions used by both the disassembler and the table - *  generator. - * Documentation for the disassembler can be found in X86Disassembler.h. - * - *===----------------------------------------------------------------------===*/ - -/* - * This header file provides those definitions that need to be shared between - * the decoder and the table generator in a C-friendly manner. - */ +//===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains common definitions used by both the disassembler and the table +//  generator. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===//  #ifndef X86DISASSEMBLERDECODERCOMMON_H  #define X86DISASSEMBLERDECODERCOMMON_H  #include "llvm/Support/DataTypes.h" +namespace llvm { +namespace X86Disassembler { +  #define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers  #define CONTEXTS_SYM      x86DisassemblerContexts  #define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes  #define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes  #define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes  #define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes -#define THREEBYTEA6_SYM   x86DisassemblerThreeByteA6Opcodes -#define THREEBYTEA7_SYM   x86DisassemblerThreeByteA7Opcodes  #define XOP8_MAP_SYM      x86DisassemblerXOP8Opcodes  #define XOP9_MAP_SYM      x86DisassemblerXOP9Opcodes  #define XOPA_MAP_SYM      x86DisassemblerXOPAOpcodes @@ -42,27 +38,29 @@  #define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"  #define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"  #define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes" -#define THREEBYTEA6_STR   "x86DisassemblerThreeByteA6Opcodes" -#define THREEBYTEA7_STR   "x86DisassemblerThreeByteA7Opcodes"  #define XOP8_MAP_STR      "x86DisassemblerXOP8Opcodes"  #define XOP9_MAP_STR      "x86DisassemblerXOP9Opcodes"  #define XOPA_MAP_STR      "x86DisassemblerXOPAOpcodes" -/* - * Attributes of an instruction that must be known before the opcode can be - * processed correctly.  Most of these indicate the presence of particular - * prefixes, but ATTR_64BIT is simply an attribute of the decoding context. - */ -#define ATTRIBUTE_BITS          \ -  ENUM_ENTRY(ATTR_NONE,   0x00) \ -  ENUM_ENTRY(ATTR_64BIT,  0x01) \ -  ENUM_ENTRY(ATTR_XS,     0x02) \ -  ENUM_ENTRY(ATTR_XD,     0x04) \ -  ENUM_ENTRY(ATTR_REXW,   0x08) \ -  ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ -  ENUM_ENTRY(ATTR_ADSIZE, 0x20) \ -  ENUM_ENTRY(ATTR_VEX,    0x40) \ -  ENUM_ENTRY(ATTR_VEXL,   0x80) +// Attributes of an instruction that must be known before the opcode can be +// processed correctly.  Most of these indicate the presence of particular +// prefixes, but ATTR_64BIT is simply an attribute of the decoding context. +#define ATTRIBUTE_BITS                  \ +  ENUM_ENTRY(ATTR_NONE,   0x00)         \ +  ENUM_ENTRY(ATTR_64BIT,  (0x1 << 0))   \ +  ENUM_ENTRY(ATTR_XS,     (0x1 << 1))   \ +  ENUM_ENTRY(ATTR_XD,     (0x1 << 2))   \ +  ENUM_ENTRY(ATTR_REXW,   (0x1 << 3))   \ +  ENUM_ENTRY(ATTR_OPSIZE, (0x1 << 4))   \ +  ENUM_ENTRY(ATTR_ADSIZE, (0x1 << 5))   \ +  ENUM_ENTRY(ATTR_VEX,    (0x1 << 6))   \ +  ENUM_ENTRY(ATTR_VEXL,   (0x1 << 7))   \ +  ENUM_ENTRY(ATTR_EVEX,   (0x1 << 8))   \ +  ENUM_ENTRY(ATTR_EVEXL,  (0x1 << 9))   \ +  ENUM_ENTRY(ATTR_EVEXL2, (0x1 << 10))  \ +  ENUM_ENTRY(ATTR_EVEXK,  (0x1 << 11))  \ +  ENUM_ENTRY(ATTR_EVEXKZ, (0x1 << 12))  \ +  ENUM_ENTRY(ATTR_EVEXB,  (0x1 << 13))  #define ENUM_ENTRY(n, v) n = v,  enum attributeBits { @@ -71,13 +69,11 @@ enum attributeBits {  };  #undef ENUM_ENTRY -/* - * Combinations of the above attributes that are relevant to instruction - * decode.  Although other combinations are possible, they can be reduced to - * these without affecting the ultimately decoded instruction. - */ +// Combinations of the above attributes that are relevant to instruction +// decode. Although other combinations are possible, they can be reduced to +// these without affecting the ultimately decoded instruction. -/*           Class name           Rank  Rationale for rank assignment         */ +//           Class name           Rank  Rationale for rank assignment  #define INSTRUCTION_CONTEXTS                                                   \    ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \    ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \ @@ -198,38 +194,38 @@ enum attributeBits {    ENUM_ENTRY(IC_EVEX_L2_W_XS_B,     4,  "requires EVEX_B, L2, W and XS prefix")    \    ENUM_ENTRY(IC_EVEX_L2_W_XD_B,     4,  "requires EVEX_B, L2, W and XD prefix")    \    ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_B, 4,  "requires EVEX_B, L2, W and OpSize")       \ -  ENUM_ENTRY(IC_EVEX_K_B,             1,  "requires EVEX_B and EVEX_K prefix")             \ -  ENUM_ENTRY(IC_EVEX_XS_K_B,          2,  "requires EVEX_B, EVEX_K and the XS prefix")     \ -  ENUM_ENTRY(IC_EVEX_XD_K_B,          2,  "requires EVEX_B, EVEX_K and the XD prefix")     \ -  ENUM_ENTRY(IC_EVEX_OPSIZE_K_B,      2,  "requires EVEX_B, EVEX_K and the OpSize prefix") \ -  ENUM_ENTRY(IC_EVEX_W_K_B,           3,  "requires EVEX_B, EVEX_K and the W prefix")      \ -  ENUM_ENTRY(IC_EVEX_W_XS_K_B,        4,  "requires EVEX_B, EVEX_K, W, and XS prefix")     \ -  ENUM_ENTRY(IC_EVEX_W_XD_K_B,        4,  "requires EVEX_B, EVEX_K, W, and XD prefix")     \ -  ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B,    4,  "requires EVEX_B, EVEX_K, W, and OpSize")        \ -  ENUM_ENTRY(IC_EVEX_L_K_B,           3,  "requires EVEX_B, EVEX_K and the L prefix")       \ -  ENUM_ENTRY(IC_EVEX_L_XS_K_B,        4,  "requires EVEX_B, EVEX_K and the L and XS prefix")\ -  ENUM_ENTRY(IC_EVEX_L_XD_K_B,        4,  "requires EVEX_B, EVEX_K and the L and XD prefix")\ -  ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B,    4,  "requires EVEX_B, EVEX_K, L, and OpSize")         \ -  ENUM_ENTRY(IC_EVEX_L_W_K_B,         3,  "requires EVEX_B, EVEX_K, L and W")               \ -  ENUM_ENTRY(IC_EVEX_L_W_XS_K_B,      4,  "requires EVEX_B, EVEX_K, L, W and XS prefix")    \ -  ENUM_ENTRY(IC_EVEX_L_W_XD_K_B,      4,  "requires EVEX_B, EVEX_K, L, W and XD prefix")    \ -  ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B,  4,  "requires EVEX_B, EVEX_K, L, W and OpSize")       \ -  ENUM_ENTRY(IC_EVEX_L2_K_B,          3,  "requires EVEX_B, EVEX_K and the L2 prefix")       \ -  ENUM_ENTRY(IC_EVEX_L2_XS_K_B,       4,  "requires EVEX_B, EVEX_K and the L2 and XS prefix")\ -  ENUM_ENTRY(IC_EVEX_L2_XD_K_B,       4,  "requires EVEX_B, EVEX_K and the L2 and XD prefix")\ -  ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B,   4,  "requires EVEX_B, EVEX_K, L2, and OpSize")         \ -  ENUM_ENTRY(IC_EVEX_L2_W_K_B,        3,  "requires EVEX_B, EVEX_K, L2 and W")               \ -  ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B,     4,  "requires EVEX_B, EVEX_K, L2, W and XS prefix")    \ -  ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B,     4,  "requires EVEX_B, EVEX_K, L2, W and XD prefix")    \ -  ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B, 4,  "requires EVEX_B, EVEX_K, L2, W and OpSize")       \ -  ENUM_ENTRY(IC_EVEX_KZ_B,             1,  "requires EVEX_B and EVEX_KZ prefix")             \ -  ENUM_ENTRY(IC_EVEX_XS_KZ_B,          2,  "requires EVEX_B, EVEX_KZ and the XS prefix")     \ -  ENUM_ENTRY(IC_EVEX_XD_KZ_B,          2,  "requires EVEX_B, EVEX_KZ and the XD prefix")     \ -  ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B,      2,  "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ -  ENUM_ENTRY(IC_EVEX_W_KZ_B,           3,  "requires EVEX_B, EVEX_KZ and the W prefix")      \ -  ENUM_ENTRY(IC_EVEX_W_XS_KZ_B,        4,  "requires EVEX_B, EVEX_KZ, W, and XS prefix")     \ -  ENUM_ENTRY(IC_EVEX_W_XD_KZ_B,        4,  "requires EVEX_B, EVEX_KZ, W, and XD prefix")     \ -  ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B,    4,  "requires EVEX_B, EVEX_KZ, W, and OpSize")        \ +  ENUM_ENTRY(IC_EVEX_K_B,           1,  "requires EVEX_B and EVEX_K prefix")             \ +  ENUM_ENTRY(IC_EVEX_XS_K_B,        2,  "requires EVEX_B, EVEX_K and the XS prefix")     \ +  ENUM_ENTRY(IC_EVEX_XD_K_B,        2,  "requires EVEX_B, EVEX_K and the XD prefix")     \ +  ENUM_ENTRY(IC_EVEX_OPSIZE_K_B,    2,  "requires EVEX_B, EVEX_K and the OpSize prefix") \ +  ENUM_ENTRY(IC_EVEX_W_K_B,         3,  "requires EVEX_B, EVEX_K and the W prefix")      \ +  ENUM_ENTRY(IC_EVEX_W_XS_K_B,      4,  "requires EVEX_B, EVEX_K, W, and XS prefix")     \ +  ENUM_ENTRY(IC_EVEX_W_XD_K_B,      4,  "requires EVEX_B, EVEX_K, W, and XD prefix")     \ +  ENUM_ENTRY(IC_EVEX_W_OPSIZE_K_B,  4,  "requires EVEX_B, EVEX_K, W, and OpSize")        \ +  ENUM_ENTRY(IC_EVEX_L_K_B,         3,  "requires EVEX_B, EVEX_K and the L prefix")       \ +  ENUM_ENTRY(IC_EVEX_L_XS_K_B,      4,  "requires EVEX_B, EVEX_K and the L and XS prefix")\ +  ENUM_ENTRY(IC_EVEX_L_XD_K_B,      4,  "requires EVEX_B, EVEX_K and the L and XD prefix")\ +  ENUM_ENTRY(IC_EVEX_L_OPSIZE_K_B,  4,  "requires EVEX_B, EVEX_K, L, and OpSize")         \ +  ENUM_ENTRY(IC_EVEX_L_W_K_B,       3,  "requires EVEX_B, EVEX_K, L and W")               \ +  ENUM_ENTRY(IC_EVEX_L_W_XS_K_B,    4,  "requires EVEX_B, EVEX_K, L, W and XS prefix")    \ +  ENUM_ENTRY(IC_EVEX_L_W_XD_K_B,    4,  "requires EVEX_B, EVEX_K, L, W and XD prefix")    \ +  ENUM_ENTRY(IC_EVEX_L_W_OPSIZE_K_B,4,  "requires EVEX_B, EVEX_K, L, W and OpSize")       \ +  ENUM_ENTRY(IC_EVEX_L2_K_B,        3,  "requires EVEX_B, EVEX_K and the L2 prefix")       \ +  ENUM_ENTRY(IC_EVEX_L2_XS_K_B,     4,  "requires EVEX_B, EVEX_K and the L2 and XS prefix")\ +  ENUM_ENTRY(IC_EVEX_L2_XD_K_B,     4,  "requires EVEX_B, EVEX_K and the L2 and XD prefix")\ +  ENUM_ENTRY(IC_EVEX_L2_OPSIZE_K_B, 4,  "requires EVEX_B, EVEX_K, L2, and OpSize")         \ +  ENUM_ENTRY(IC_EVEX_L2_W_K_B,      3,  "requires EVEX_B, EVEX_K, L2 and W")               \ +  ENUM_ENTRY(IC_EVEX_L2_W_XS_K_B,   4,  "requires EVEX_B, EVEX_K, L2, W and XS prefix")    \ +  ENUM_ENTRY(IC_EVEX_L2_W_XD_K_B,   4,  "requires EVEX_B, EVEX_K, L2, W and XD prefix")    \ +  ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_K_B,4,  "requires EVEX_B, EVEX_K, L2, W and OpSize")       \ +  ENUM_ENTRY(IC_EVEX_KZ_B,           1,  "requires EVEX_B and EVEX_KZ prefix")             \ +  ENUM_ENTRY(IC_EVEX_XS_KZ_B,        2,  "requires EVEX_B, EVEX_KZ and the XS prefix")     \ +  ENUM_ENTRY(IC_EVEX_XD_KZ_B,        2,  "requires EVEX_B, EVEX_KZ and the XD prefix")     \ +  ENUM_ENTRY(IC_EVEX_OPSIZE_KZ_B,    2,  "requires EVEX_B, EVEX_KZ and the OpSize prefix") \ +  ENUM_ENTRY(IC_EVEX_W_KZ_B,         3,  "requires EVEX_B, EVEX_KZ and the W prefix")      \ +  ENUM_ENTRY(IC_EVEX_W_XS_KZ_B,      4,  "requires EVEX_B, EVEX_KZ, W, and XS prefix")     \ +  ENUM_ENTRY(IC_EVEX_W_XD_KZ_B,      4,  "requires EVEX_B, EVEX_KZ, W, and XD prefix")     \ +  ENUM_ENTRY(IC_EVEX_W_OPSIZE_KZ_B,  4,  "requires EVEX_B, EVEX_KZ, W, and OpSize")        \    ENUM_ENTRY(IC_EVEX_L_KZ_B,           3,  "requires EVEX_B, EVEX_KZ and the L prefix")       \    ENUM_ENTRY(IC_EVEX_L_XS_KZ_B,        4,  "requires EVEX_B, EVEX_KZ and the L and XS prefix")\    ENUM_ENTRY(IC_EVEX_L_XD_KZ_B,        4,  "requires EVEX_B, EVEX_KZ and the L and XD prefix")\ @@ -269,62 +265,52 @@ enum attributeBits {    ENUM_ENTRY(IC_EVEX_L2_W_KZ,        3,  "requires EVEX_KZ, L2 and W")               \    ENUM_ENTRY(IC_EVEX_L2_W_XS_KZ,     4,  "requires EVEX_KZ, L2, W and XS prefix")    \    ENUM_ENTRY(IC_EVEX_L2_W_XD_KZ,     4,  "requires EVEX_KZ, L2, W and XD prefix")    \ -  ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4,  "requires EVEX_KZ, L2, W and OpSize")      +  ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4,  "requires EVEX_KZ, L2, W and OpSize")  #define ENUM_ENTRY(n, r, d) n, -typedef enum { +enum InstructionContext {    INSTRUCTION_CONTEXTS    IC_max -} InstructionContext; +};  #undef ENUM_ENTRY -/* - * Opcode types, which determine which decode table to use, both in the Intel - * manual and also for the decoder. - */ -typedef enum { +// Opcode types, which determine which decode table to use, both in the Intel +// manual and also for the decoder. +enum OpcodeType {    ONEBYTE       = 0,    TWOBYTE       = 1,    THREEBYTE_38  = 2,    THREEBYTE_3A  = 3, -  THREEBYTE_A6  = 4, -  THREEBYTE_A7  = 5, -  XOP8_MAP      = 6, -  XOP9_MAP      = 7, -  XOPA_MAP      = 8 -} OpcodeType; - -/* - * The following structs are used for the hierarchical decode table.  After - * determining the instruction's class (i.e., which IC_* constant applies to - * it), the decoder reads the opcode.  Some instructions require specific - * values of the ModR/M byte, so the ModR/M byte indexes into the final table. - * - * If a ModR/M byte is not required, "required" is left unset, and the values - * for each instructionID are identical. - */ +  XOP8_MAP      = 4, +  XOP9_MAP      = 5, +  XOPA_MAP      = 6 +}; +// The following structs are used for the hierarchical decode table.  After +// determining the instruction's class (i.e., which IC_* constant applies to +// it), the decoder reads the opcode.  Some instructions require specific +// values of the ModR/M byte, so the ModR/M byte indexes into the final table. +// +// If a ModR/M byte is not required, "required" is left unset, and the values +// for each instructionID are identical.  typedef uint16_t InstrUID; -/* - * ModRMDecisionType - describes the type of ModR/M decision, allowing the - * consumer to determine the number of entries in it. - * - * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded - *                  instruction is the same. - * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode - *                  corresponds to one instruction; otherwise, it corresponds to - *                  a different instruction. - * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte - *                  divided by 8 is used to select instruction; otherwise, each - *                  value of the ModR/M byte could correspond to a different - *                  instruction. - * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This -                    corresponds to instructions that use reg field as opcode - * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond - *                  to a different instruction. - */ - +// ModRMDecisionType - describes the type of ModR/M decision, allowing the +// consumer to determine the number of entries in it. +// +// MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded +//                  instruction is the same. +// MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode +//                  corresponds to one instruction; otherwise, it corresponds to +//                  a different instruction. +// MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte +//                  divided by 8 is used to select instruction; otherwise, each +//                  value of the ModR/M byte could correspond to a different +//                  instruction. +// MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This +//                  corresponds to instructions that use reg field as opcode +// MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond +//                  to a different instruction.  #define MODRMTYPES            \    ENUM_ENTRY(MODRM_ONEENTRY)  \    ENUM_ENTRY(MODRM_SPLITRM)   \ @@ -333,51 +319,32 @@ typedef uint16_t InstrUID;    ENUM_ENTRY(MODRM_FULL)  #define ENUM_ENTRY(n) n, -typedef enum { +enum ModRMDecisionType {    MODRMTYPES    MODRM_max -} ModRMDecisionType; -#undef ENUM_ENTRY - -/* - * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which - *  instruction each possible value of the ModR/M byte corresponds to.  Once - *  this information is known, we have narrowed down to a single instruction. - */ -struct ModRMDecision { -  uint8_t     modrm_type; - -  /* The macro below must be defined wherever this file is included. */ -  INSTRUCTION_IDS -}; - -/* - * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at - *   given a particular opcode. - */ -struct OpcodeDecision { -  struct ModRMDecision modRMDecisions[256]; -}; - -/* - * ContextDecision - Specifies which opcode->instruction tables to look at given - *   a particular context (set of attributes).  Since there are many possible - *   contexts, the decoder first uses CONTEXTS_SYM to determine which context - *   applies given a specific set of attributes.  Hence there are only IC_max - *   entries in this table, rather than 2^(ATTR_max). - */ -struct ContextDecision { -  struct OpcodeDecision opcodeDecisions[IC_max];  }; +#undef ENUM_ENTRY -/* - * Physical encodings of instruction operands. - */ +#define CASE_ENCODING_RM     \ +    case ENCODING_RM:        \ +    case ENCODING_RM_CD2:    \ +    case ENCODING_RM_CD4:    \ +    case ENCODING_RM_CD8:    \ +    case ENCODING_RM_CD16:   \ +    case ENCODING_RM_CD32:   \ +    case ENCODING_RM_CD64 +// Physical encodings of instruction operands.  #define ENCODINGS                                                              \    ENUM_ENTRY(ENCODING_NONE,   "")                                              \    ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \    ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \ +  ENUM_ENTRY(ENCODING_RM_CD2, "R/M operand with CDisp scaling of 2")           \ +  ENUM_ENTRY(ENCODING_RM_CD4, "R/M operand with CDisp scaling of 4")           \ +  ENUM_ENTRY(ENCODING_RM_CD8, "R/M operand with CDisp scaling of 8")           \ +  ENUM_ENTRY(ENCODING_RM_CD16,"R/M operand with CDisp scaling of 16")          \ +  ENUM_ENTRY(ENCODING_RM_CD32,"R/M operand with CDisp scaling of 32")          \ +  ENUM_ENTRY(ENCODING_RM_CD64,"R/M operand with CDisp scaling of 64")          \    ENUM_ENTRY(ENCODING_VVVV,   "Register operand in VEX.vvvv byte.")            \    ENUM_ENTRY(ENCODING_WRITEMASK, "Register operand in EVEX.aaa byte.")         \    ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \ @@ -395,27 +362,26 @@ struct ContextDecision {    ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \    ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \    ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \ -  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \ -                              "opcode byte")                                   \ +  ENUM_ENTRY(ENCODING_FP,     "Position on floating-point stack in ModR/M "    \ +                              "byte.")                                         \                                                                                 \    ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \    ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \    ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \                                "opcode byte")                                   \    ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \ -                              "in type") +                              "in type")                                       \ +  ENUM_ENTRY(ENCODING_SI,     "Source index; encoded in OpSize/Adsize prefix") \ +  ENUM_ENTRY(ENCODING_DI,     "Destination index; encoded in prefixes")  #define ENUM_ENTRY(n, d) n, -  typedef enum { -    ENCODINGS -    ENCODING_max -  } OperandEncoding; +enum OperandEncoding { +  ENCODINGS +  ENCODING_max +};  #undef ENUM_ENTRY -/* - * Semantic interpretations of instruction operands. - */ - +// Semantic interpretations of instruction operands.  #define TYPES                                                                  \    ENUM_ENTRY(TYPE_NONE,       "")                                              \    ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \ @@ -454,6 +420,14 @@ struct ContextDecision {    ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \    ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \    ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \ +  ENUM_ENTRY(TYPE_SRCIDX8,    "1-byte memory at source index")                 \ +  ENUM_ENTRY(TYPE_SRCIDX16,   "2-byte memory at source index")                 \ +  ENUM_ENTRY(TYPE_SRCIDX32,   "4-byte memory at source index")                 \ +  ENUM_ENTRY(TYPE_SRCIDX64,   "8-byte memory at source index")                 \ +  ENUM_ENTRY(TYPE_DSTIDX8,    "1-byte memory at destination index")            \ +  ENUM_ENTRY(TYPE_DSTIDX16,   "2-byte memory at destination index")            \ +  ENUM_ENTRY(TYPE_DSTIDX32,   "4-byte memory at destination index")            \ +  ENUM_ENTRY(TYPE_DSTIDX64,   "8-byte memory at destination index")            \    ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \                                "base)")                                         \    ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \ @@ -478,8 +452,13 @@ struct ContextDecision {    ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \    ENUM_ENTRY(TYPE_XMM256,     "32-byte")                                       \    ENUM_ENTRY(TYPE_XMM512,     "64-byte")                                       \ +  ENUM_ENTRY(TYPE_VK1,        "1-bit")                                         \ +  ENUM_ENTRY(TYPE_VK2,        "2-bit")                                         \ +  ENUM_ENTRY(TYPE_VK4,        "4-bit")                                         \    ENUM_ENTRY(TYPE_VK8,        "8-bit")                                         \    ENUM_ENTRY(TYPE_VK16,       "16-bit")                                        \ +  ENUM_ENTRY(TYPE_VK32,       "32-bit")                                        \ +  ENUM_ENTRY(TYPE_VK64,       "64-bit")                                        \    ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \    ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \    ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \ @@ -497,61 +476,42 @@ struct ContextDecision {    ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")  #define ENUM_ENTRY(n, d) n, -typedef enum { +enum OperandType {    TYPES    TYPE_max -} OperandType; +};  #undef ENUM_ENTRY -/* - * OperandSpecifier - The specification for how to extract and interpret one - *   operand. - */ +/// \brief The specification for how to extract and interpret one operand.  struct OperandSpecifier {    uint8_t encoding;    uint8_t type;  }; -/* - * Indicates where the opcode modifier (if any) is to be found.  Extended - * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte. - */ - +// Indicates where the opcode modifier (if any) is to be found.  Extended +// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.  #define MODIFIER_TYPES        \ -  ENUM_ENTRY(MODIFIER_NONE)   \ -  ENUM_ENTRY(MODIFIER_OPCODE) \ -  ENUM_ENTRY(MODIFIER_MODRM) +  ENUM_ENTRY(MODIFIER_NONE)  #define ENUM_ENTRY(n) n, -typedef enum { +enum ModifierType {    MODIFIER_TYPES    MODIFIER_max -} ModifierType; +};  #undef ENUM_ENTRY -#define X86_MAX_OPERANDS 5 - -/* - * The specification for how to extract and interpret a full instruction and - * its operands. - */ -struct InstructionSpecifier { -  uint8_t modifierType; -  uint8_t modifierBase; - -  /* The macro below must be defined wherever this file is included. */ -  INSTRUCTION_SPECIFIER_FIELDS -}; +static const unsigned X86_MAX_OPERANDS = 5; -/* - * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode - * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, - * respectively. - */ -typedef enum { +/// Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode +/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, +/// respectively. +enum DisassemblerMode {    MODE_16BIT,    MODE_32BIT,    MODE_64BIT -} DisassemblerMode; +}; + +} // namespace X86Disassembler +} // namespace llvm  #endif  | 
