diff options
Diffstat (limited to 'llvm/lib/Target/X86/Disassembler')
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp | 853 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp | 1938 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 695 | 
3 files changed, 3486 insertions, 0 deletions
| diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp new file mode 100644 index 000000000000..9a635bbe5f85 --- /dev/null +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -0,0 +1,853 @@ +//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains code to translate the data produced by the decoder into +//  MCInsts. +// +// +// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and +// 64-bit X86 instruction sets.  The main decode sequence for an assembly +// instruction in this disassembler is: +// +// 1. Read the prefix bytes and determine the attributes of the instruction. +//    These attributes, recorded in enum attributeBits +//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM +//    provides a mapping from bitmasks to contexts, which are represented by +//    enum InstructionContext (ibid.). +// +// 2. Read the opcode, and determine what kind of opcode it is.  The +//    disassembler distinguishes four kinds of opcodes, which are enumerated in +//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte +//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a +//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context. +// +// 3. Depending on the opcode type, look in one of four ClassDecision structures +//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which +//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get +//    a ModRMDecision (ibid.). +// +// 4. Some instructions, such as escape opcodes or extended opcodes, or even +//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the +//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from +//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the +//    ModR/M byte is required and how to interpret it. +// +// 5. After resolving the ModRMDecision, the disassembler has a unique ID +//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in +//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and +//    meanings of its operands. +// +// 6. For each operand, its encoding is an entry from OperandEncoding +//    (X86DisassemblerDecoderCommon.h) and its type is an entry from +//    OperandType (ibid.).  The encoding indicates how to read it from the +//    instruction; the type indicates how to interpret the value once it has +//    been read.  For example, a register operand could be stored in the R/M +//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to +//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM +//    register, for instance).  Given this information, the operands can be +//    extracted and interpreted. +// +// 7. As the last step, the disassembler translates the instruction information +//    and operands into a format understandable by the client - in this case, an +//    MCInst for use by the MC infrastructure. +// +// The disassembler is broken broadly into two parts: the table emitter that +// emits the instruction decode tables discussed above during compilation, and +// the disassembler itself.  The table emitter is documented in more detail in +// utils/TableGen/X86DisassemblerEmitter.h. +// +// X86Disassembler.cpp contains the code responsible for step 7, and for +//   invoking the decoder to execute steps 1-6. +// X86DisassemblerDecoderCommon.h contains the definitions needed by both the +//   table emitter and the disassembler. +// X86DisassemblerDecoder.h contains the public interface of the decoder, +//   factored out into C for possible use by other projects. +// X86DisassemblerDecoder.c contains the source code of the decoder, which is +//   responsible for steps 1-6. +// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "TargetInfo/X86TargetInfo.h" +#include "X86DisassemblerDecoder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::X86Disassembler; + +#define DEBUG_TYPE "x86-disassembler" + +void llvm::X86Disassembler::Debug(const char *file, unsigned line, +                                  const char *s) { +  dbgs() << file << ":" << line << ": " << s; +} + +StringRef llvm::X86Disassembler::GetInstrName(unsigned Opcode, +                                                const void *mii) { +  const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); +  return MII->getName(Opcode); +} + +#define debug(s) LLVM_DEBUG(Debug(__FILE__, __LINE__, s)); + +namespace llvm { + +// Fill-ins to make the compiler happy.  These constants are never actually +//   assigned; they are just filler to make an automatically-generated switch +//   statement work. +namespace X86 { +  enum { +    BX_SI = 500, +    BX_DI = 501, +    BP_SI = 502, +    BP_DI = 503, +    sib   = 504, +    sib64 = 505 +  }; +} + +} + +static bool translateInstruction(MCInst &target, +                                InternalInstruction &source, +                                const MCDisassembler *Dis); + +namespace { + +/// Generic disassembler for all X86 platforms. All each platform class should +/// have to do is subclass the constructor, and provide a different +/// disassemblerMode value. +class X86GenericDisassembler : public MCDisassembler { +  std::unique_ptr<const MCInstrInfo> MII; +public: +  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, +                         std::unique_ptr<const MCInstrInfo> MII); +public: +  DecodeStatus getInstruction(MCInst &instr, uint64_t &size, +                              ArrayRef<uint8_t> Bytes, uint64_t Address, +                              raw_ostream &vStream, +                              raw_ostream &cStream) const override; + +private: +  DisassemblerMode              fMode; +}; + +} + +X86GenericDisassembler::X86GenericDisassembler( +                                         const MCSubtargetInfo &STI, +                                         MCContext &Ctx, +                                         std::unique_ptr<const MCInstrInfo> MII) +  : MCDisassembler(STI, Ctx), MII(std::move(MII)) { +  const FeatureBitset &FB = STI.getFeatureBits(); +  if (FB[X86::Mode16Bit]) { +    fMode = MODE_16BIT; +    return; +  } else if (FB[X86::Mode32Bit]) { +    fMode = MODE_32BIT; +    return; +  } else if (FB[X86::Mode64Bit]) { +    fMode = MODE_64BIT; +    return; +  } + +  llvm_unreachable("Invalid CPU mode"); +} + +namespace { +struct Region { +  ArrayRef<uint8_t> Bytes; +  uint64_t Base; +  Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} +}; +} // end anonymous namespace + +/// A callback function that wraps the readByte method from Region. +/// +/// @param Arg      - The generic callback parameter.  In this case, this should +///                   be a pointer to a Region. +/// @param Byte     - A pointer to the byte to be read. +/// @param Address  - The address to be read. +static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { +  auto *R = static_cast<const Region *>(Arg); +  ArrayRef<uint8_t> Bytes = R->Bytes; +  unsigned Index = Address - R->Base; +  if (Bytes.size() <= Index) +    return -1; +  *Byte = Bytes[Index]; +  return 0; +} + +/// logger - a callback function that wraps the operator<< method from +///   raw_ostream. +/// +/// @param arg      - The generic callback parameter.  This should be a pointe +///                   to a raw_ostream. +/// @param log      - A string to be logged.  logger() adds a newline. +static void logger(void* arg, const char* log) { +  if (!arg) +    return; + +  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); +  vStream << log << "\n"; +} + +// +// Public interface for the disassembler +// + +MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( +    MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, +    raw_ostream &VStream, raw_ostream &CStream) const { +  CommentStream = &CStream; + +  InternalInstruction InternalInstr; + +  dlog_t LoggerFn = logger; +  if (&VStream == &nulls()) +    LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). + +  Region R(Bytes, Address); + +  int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, +                              LoggerFn, (void *)&VStream, +                              (const void *)MII.get(), Address, fMode); + +  if (Ret) { +    Size = InternalInstr.readerCursor - Address; +    return Fail; +  } else { +    Size = InternalInstr.length; +    bool Ret = translateInstruction(Instr, InternalInstr, this); +    if (!Ret) { +      unsigned Flags = X86::IP_NO_PREFIX; +      if (InternalInstr.hasAdSize) +        Flags |= X86::IP_HAS_AD_SIZE; +      if (!InternalInstr.mandatoryPrefix) { +        if (InternalInstr.hasOpSize) +          Flags |= X86::IP_HAS_OP_SIZE; +        if (InternalInstr.repeatPrefix == 0xf2) +          Flags |= X86::IP_HAS_REPEAT_NE; +        else if (InternalInstr.repeatPrefix == 0xf3 && +                 // It should not be 'pause' f3 90 +                 InternalInstr.opcode != 0x90) +          Flags |= X86::IP_HAS_REPEAT; +        if (InternalInstr.hasLockPrefix) +          Flags |= X86::IP_HAS_LOCK; +      } +      Instr.setFlags(Flags); +    } +    return (!Ret) ? Success : Fail; +  } +} + +// +// Private code that translates from struct InternalInstructions to MCInsts. +// + +/// translateRegister - Translates an internal register to the appropriate LLVM +///   register, and appends it as an operand to an MCInst. +/// +/// @param mcInst     - The MCInst to append to. +/// @param reg        - The Reg to append. +static void translateRegister(MCInst &mcInst, Reg reg) { +#define ENTRY(x) X86::x, +  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS}; +#undef ENTRY + +  MCPhysReg llvmRegnum = llvmRegnums[reg]; +  mcInst.addOperand(MCOperand::createReg(llvmRegnum)); +} + +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. +/// +/// @param Value      - The immediate Value, has had any PC adjustment made by +///                     the caller. +/// @param isBranch   - If the instruction is a branch instruction +/// @param Address    - The starting address of the instruction +/// @param Offset     - The byte offset to this immediate in the instruction +/// @param Width      - The byte width of this immediate in the instruction +/// +/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was +/// called then that function is called to get any symbolic information for the +/// immediate in the instruction using the Address, Offset and Width.  If that +/// returns non-zero then the symbolic information it returns is used to create +/// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo() +/// returns zero and isBranch is true then a symbol look up for immediate Value +/// is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with the immediate Value is created.  This function returns true +/// if it adds an operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, +                                     uint64_t Address, uint64_t Offset, +                                     uint64_t Width, MCInst &MI, +                                     const MCDisassembler *Dis) { +  return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, +                                       Offset, Width); +} + +/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being +/// referenced by a load instruction with the base register that is the rip. +/// These can often be addresses in a literal pool.  The Address of the +/// instruction and its immediate Value are used to determine the address +/// being referenced in the literal pool entry.  The SymbolLookUp call back will +/// return a pointer to a literal 'C' string if the referenced address is an +/// address into a section with 'C' string literals. +static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, +                                            const void *Decoder) { +  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); +  Dis->tryAddingPcLoadReferenceComment(Value, Address); +} + +static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = { +  0,        // SEG_OVERRIDE_NONE +  X86::CS, +  X86::SS, +  X86::DS, +  X86::ES, +  X86::FS, +  X86::GS +}; + +/// translateSrcIndex   - Appends a source index operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction. +static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { +  unsigned baseRegNo; + +  if (insn.mode == MODE_64BIT) +    baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; +  else if (insn.mode == MODE_32BIT) +    baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; +  else { +    assert(insn.mode == MODE_16BIT); +    baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; +  } +  MCOperand baseReg = MCOperand::createReg(baseRegNo); +  mcInst.addOperand(baseReg); + +  MCOperand segmentReg; +  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); +  mcInst.addOperand(segmentReg); +  return false; +} + +/// translateDstIndex   - Appends a destination index operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction. + +static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { +  unsigned baseRegNo; + +  if (insn.mode == MODE_64BIT) +    baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; +  else if (insn.mode == MODE_32BIT) +    baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; +  else { +    assert(insn.mode == MODE_16BIT); +    baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; +  } +  MCOperand baseReg = MCOperand::createReg(baseRegNo); +  mcInst.addOperand(baseReg); +  return false; +} + +/// translateImmediate  - Appends an immediate operand to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param immediate    - The immediate value to append. +/// @param operand      - The operand, as stored in the descriptor table. +/// @param insn         - The internal instruction. +static void translateImmediate(MCInst &mcInst, uint64_t immediate, +                               const OperandSpecifier &operand, +                               InternalInstruction &insn, +                               const MCDisassembler *Dis) { +  // Sign-extend the immediate if necessary. + +  OperandType type = (OperandType)operand.type; + +  bool isBranch = false; +  uint64_t pcrel = 0; +  if (type == TYPE_REL) { +    isBranch = true; +    pcrel = insn.startLocation + +            insn.immediateOffset + insn.immediateSize; +    switch (operand.encoding) { +    default: +      break; +    case ENCODING_Iv: +      switch (insn.displacementSize) { +      default: +        break; +      case 1: +        if(immediate & 0x80) +          immediate |= ~(0xffull); +        break; +      case 2: +        if(immediate & 0x8000) +          immediate |= ~(0xffffull); +        break; +      case 4: +        if(immediate & 0x80000000) +          immediate |= ~(0xffffffffull); +        break; +      case 8: +        break; +      } +      break; +    case ENCODING_IB: +      if(immediate & 0x80) +        immediate |= ~(0xffull); +      break; +    case ENCODING_IW: +      if(immediate & 0x8000) +        immediate |= ~(0xffffull); +      break; +    case ENCODING_ID: +      if(immediate & 0x80000000) +        immediate |= ~(0xffffffffull); +      break; +    } +  } +  // By default sign-extend all X86 immediates based on their encoding. +  else if (type == TYPE_IMM) { +    switch (operand.encoding) { +    default: +      break; +    case ENCODING_IB: +      if(immediate & 0x80) +        immediate |= ~(0xffull); +      break; +    case ENCODING_IW: +      if(immediate & 0x8000) +        immediate |= ~(0xffffull); +      break; +    case ENCODING_ID: +      if(immediate & 0x80000000) +        immediate |= ~(0xffffffffull); +      break; +    case ENCODING_IO: +      break; +    } +  } + +  switch (type) { +  case TYPE_XMM: +    mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4))); +    return; +  case TYPE_YMM: +    mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4))); +    return; +  case TYPE_ZMM: +    mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4))); +    return; +  default: +    // operand is 64 bits wide.  Do nothing. +    break; +  } + +  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, +                               insn.immediateOffset, insn.immediateSize, +                               mcInst, Dis)) +    mcInst.addOperand(MCOperand::createImm(immediate)); + +  if (type == TYPE_MOFFS) { +    MCOperand segmentReg; +    segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); +    mcInst.addOperand(segmentReg); +  } +} + +/// translateRMRegister - Translates a register stored in the R/M field of the +///   ModR/M byte to its LLVM equivalent and appends it to an MCInst. +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The internal instruction to extract the R/M field +///                       from. +/// @return             - 0 on success; -1 otherwise +static bool translateRMRegister(MCInst &mcInst, +                                InternalInstruction &insn) { +  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { +    debug("A R/M register operand may not have a SIB byte"); +    return true; +  } + +  switch (insn.eaBase) { +  default: +    debug("Unexpected EA base register"); +    return true; +  case EA_BASE_NONE: +    debug("EA_BASE_NONE for ModR/M base"); +    return true; +#define ENTRY(x) case EA_BASE_##x: +  ALL_EA_BASES +#undef ENTRY +    debug("A R/M register operand may not have a base; " +          "the operand must be a register."); +    return true; +#define ENTRY(x)                                                      \ +  case EA_REG_##x:                                                    \ +    mcInst.addOperand(MCOperand::createReg(X86::x)); break; +  ALL_REGS +#undef ENTRY +  } + +  return false; +} + +/// translateRMMemory - Translates a memory operand stored in the Mod and R/M +///   fields of an internal instruction (and possibly its SIB byte) to a memory +///   operand in LLVM's format, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields +///                       from. +/// @return             - 0 on success; nonzero otherwise +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, +                              const MCDisassembler *Dis) { +  // Addresses in an MCInst are represented as five operands: +  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the +  //                                SIB base +  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified +  //                                scale amount +  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB) +  //                                the index (which is multiplied by the +  //                                scale amount) +  //   4. displacement  (immediate) 0, or the displacement if there is one +  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set +  //                                if we have segment overrides + +  MCOperand baseReg; +  MCOperand scaleAmount; +  MCOperand indexReg; +  MCOperand displacement; +  MCOperand segmentReg; +  uint64_t pcrel = 0; + +  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { +    if (insn.sibBase != SIB_BASE_NONE) { +      switch (insn.sibBase) { +      default: +        debug("Unexpected sibBase"); +        return true; +#define ENTRY(x)                                          \ +      case SIB_BASE_##x:                                  \ +        baseReg = MCOperand::createReg(X86::x); break; +      ALL_SIB_BASES +#undef ENTRY +      } +    } else { +      baseReg = MCOperand::createReg(X86::NoRegister); +    } + +    if (insn.sibIndex != SIB_INDEX_NONE) { +      switch (insn.sibIndex) { +      default: +        debug("Unexpected sibIndex"); +        return true; +#define ENTRY(x)                                          \ +      case SIB_INDEX_##x:                                 \ +        indexReg = MCOperand::createReg(X86::x); break; +      EA_BASES_32BIT +      EA_BASES_64BIT +      REGS_XMM +      REGS_YMM +      REGS_ZMM +#undef ENTRY +      } +    } else { +      // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present, +      // but no index is used and modrm alone should have been enough. +      // -No base register in 32-bit mode. In 64-bit mode this is used to +      //  avoid rip-relative addressing. +      // -Any base register used other than ESP/RSP/R12D/R12. Using these as a +      //  base always requires a SIB byte. +      // -A scale other than 1 is used. +      if (insn.sibScale != 1 || +          (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) || +          (insn.sibBase != SIB_BASE_NONE && +           insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP && +           insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12)) { +        indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ : +                                                                X86::RIZ); +      } else +        indexReg = MCOperand::createReg(X86::NoRegister); +    } + +    scaleAmount = MCOperand::createImm(insn.sibScale); +  } else { +    switch (insn.eaBase) { +    case EA_BASE_NONE: +      if (insn.eaDisplacement == EA_DISP_NONE) { +        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); +        return true; +      } +      if (insn.mode == MODE_64BIT){ +        pcrel = insn.startLocation + +                insn.displacementOffset + insn.displacementSize; +        tryAddingPcLoadReferenceComment(insn.startLocation + +                                        insn.displacementOffset, +                                        insn.displacement + pcrel, Dis); +        // Section 2.2.1.6 +        baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP : +                                                               X86::RIP); +      } +      else +        baseReg = MCOperand::createReg(X86::NoRegister); + +      indexReg = MCOperand::createReg(X86::NoRegister); +      break; +    case EA_BASE_BX_SI: +      baseReg = MCOperand::createReg(X86::BX); +      indexReg = MCOperand::createReg(X86::SI); +      break; +    case EA_BASE_BX_DI: +      baseReg = MCOperand::createReg(X86::BX); +      indexReg = MCOperand::createReg(X86::DI); +      break; +    case EA_BASE_BP_SI: +      baseReg = MCOperand::createReg(X86::BP); +      indexReg = MCOperand::createReg(X86::SI); +      break; +    case EA_BASE_BP_DI: +      baseReg = MCOperand::createReg(X86::BP); +      indexReg = MCOperand::createReg(X86::DI); +      break; +    default: +      indexReg = MCOperand::createReg(X86::NoRegister); +      switch (insn.eaBase) { +      default: +        debug("Unexpected eaBase"); +        return true; +        // Here, we will use the fill-ins defined above.  However, +        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and +        //   sib and sib64 were handled in the top-level if, so they're only +        //   placeholders to keep the compiler happy. +#define ENTRY(x)                                        \ +      case EA_BASE_##x:                                 \ +        baseReg = MCOperand::createReg(X86::x); break; +      ALL_EA_BASES +#undef ENTRY +#define ENTRY(x) case EA_REG_##x: +      ALL_REGS +#undef ENTRY +        debug("A R/M memory operand may not be a register; " +              "the base field must be a base."); +        return true; +      } +    } + +    scaleAmount = MCOperand::createImm(1); +  } + +  displacement = MCOperand::createImm(insn.displacement); + +  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]); + +  mcInst.addOperand(baseReg); +  mcInst.addOperand(scaleAmount); +  mcInst.addOperand(indexReg); +  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, +                               insn.startLocation, insn.displacementOffset, +                               insn.displacementSize, mcInst, Dis)) +    mcInst.addOperand(displacement); +  mcInst.addOperand(segmentReg); +  return false; +} + +/// translateRM - Translates an operand stored in the R/M (and possibly SIB) +///   byte of an instruction to LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param operand      - The operand, as stored in the descriptor table. +/// @param insn         - The instruction to extract Mod, R/M, and SIB fields +///                       from. +/// @return             - 0 on success; nonzero otherwise +static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, +                        InternalInstruction &insn, const MCDisassembler *Dis) { +  switch (operand.type) { +  default: +    debug("Unexpected type for a R/M operand"); +    return true; +  case TYPE_R8: +  case TYPE_R16: +  case TYPE_R32: +  case TYPE_R64: +  case TYPE_Rv: +  case TYPE_MM64: +  case TYPE_XMM: +  case TYPE_YMM: +  case TYPE_ZMM: +  case TYPE_VK_PAIR: +  case TYPE_VK: +  case TYPE_DEBUGREG: +  case TYPE_CONTROLREG: +  case TYPE_BNDR: +    return translateRMRegister(mcInst, insn); +  case TYPE_M: +  case TYPE_MVSIBX: +  case TYPE_MVSIBY: +  case TYPE_MVSIBZ: +    return translateRMMemory(mcInst, insn, Dis); +  } +} + +/// translateFPRegister - Translates a stack position on the FPU stack to its +///   LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param stackPos     - The stack position to translate. +static void translateFPRegister(MCInst &mcInst, +                                uint8_t stackPos) { +  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos)); +} + +/// translateMaskRegister - Translates a 3-bit mask register number to +///   LLVM form, and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param maskRegNum   - Number of mask register from 0 to 7. +/// @return             - false on success; true otherwise. +static bool translateMaskRegister(MCInst &mcInst, +                                uint8_t maskRegNum) { +  if (maskRegNum >= 8) { +    debug("Invalid mask register number"); +    return true; +  } + +  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum)); +  return false; +} + +/// translateOperand - Translates an operand stored in an internal instruction +///   to LLVM's format and appends it to an MCInst. +/// +/// @param mcInst       - The MCInst to append to. +/// @param operand      - The operand, as stored in the descriptor table. +/// @param insn         - The internal instruction. +/// @return             - false on success; true otherwise. +static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, +                             InternalInstruction &insn, +                             const MCDisassembler *Dis) { +  switch (operand.encoding) { +  default: +    debug("Unhandled operand encoding during translation"); +    return true; +  case ENCODING_REG: +    translateRegister(mcInst, insn.reg); +    return false; +  case ENCODING_WRITEMASK: +    return translateMaskRegister(mcInst, insn.writemask); +  CASE_ENCODING_RM: +  CASE_ENCODING_VSIB: +    return translateRM(mcInst, operand, insn, Dis); +  case ENCODING_IB: +  case ENCODING_IW: +  case ENCODING_ID: +  case ENCODING_IO: +  case ENCODING_Iv: +  case ENCODING_Ia: +    translateImmediate(mcInst, +                       insn.immediates[insn.numImmediatesTranslated++], +                       operand, +                       insn, +                       Dis); +    return false; +  case ENCODING_IRC: +    mcInst.addOperand(MCOperand::createImm(insn.RC)); +    return false; +  case ENCODING_SI: +    return translateSrcIndex(mcInst, insn); +  case ENCODING_DI: +    return translateDstIndex(mcInst, insn); +  case ENCODING_RB: +  case ENCODING_RW: +  case ENCODING_RD: +  case ENCODING_RO: +  case ENCODING_Rv: +    translateRegister(mcInst, insn.opcodeRegister); +    return false; +  case ENCODING_CC: +    mcInst.addOperand(MCOperand::createImm(insn.immediates[1])); +    return false; +  case ENCODING_FP: +    translateFPRegister(mcInst, insn.modRM & 7); +    return false; +  case ENCODING_VVVV: +    translateRegister(mcInst, insn.vvvv); +    return false; +  case ENCODING_DUP: +    return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0], +                            insn, Dis); +  } +} + +/// translateInstruction - Translates an internal instruction and all its +///   operands to an MCInst. +/// +/// @param mcInst       - The MCInst to populate with the instruction's data. +/// @param insn         - The internal instruction. +/// @return             - false on success; true otherwise. +static bool translateInstruction(MCInst &mcInst, +                                InternalInstruction &insn, +                                const MCDisassembler *Dis) { +  if (!insn.spec) { +    debug("Instruction has no specification"); +    return true; +  } + +  mcInst.clear(); +  mcInst.setOpcode(insn.instructionID); +  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 +  // prefix bytes should be disassembled as xrelease and xacquire then set the +  // opcode to those instead of the rep and repne opcodes. +  if (insn.xAcquireRelease) { +    if(mcInst.getOpcode() == X86::REP_PREFIX) +      mcInst.setOpcode(X86::XRELEASE_PREFIX); +    else if(mcInst.getOpcode() == X86::REPNE_PREFIX) +      mcInst.setOpcode(X86::XACQUIRE_PREFIX); +  } + +  insn.numImmediatesTranslated = 0; + +  for (const auto &Op : insn.operands) { +    if (Op.encoding != ENCODING_NONE) { +      if (translateOperand(mcInst, Op, insn, Dis)) { +        return true; +      } +    } +  } + +  return false; +} + +static MCDisassembler *createX86Disassembler(const Target &T, +                                             const MCSubtargetInfo &STI, +                                             MCContext &Ctx) { +  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo()); +  return new X86GenericDisassembler(STI, Ctx, std::move(MII)); +} + +extern "C" void LLVMInitializeX86Disassembler() { +  // Register the disassembler. +  TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(), +                                         createX86Disassembler); +  TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(), +                                         createX86Disassembler); +} diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp new file mode 100644 index 000000000000..e287f6625115 --- /dev/null +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -0,0 +1,1938 @@ +//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the implementation of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// + +#include "X86DisassemblerDecoder.h" +#include "llvm/ADT/StringRef.h" + +#include <cstdarg> /* for va_*()       */ +#include <cstdio>  /* for vsnprintf()  */ +#include <cstdlib> /* for exit()       */ +#include <cstring> /* for memset()     */ + +using namespace llvm::X86Disassembler; + +/// Specifies whether a ModR/M byte is needed and (if so) which +/// instruction each possible value of the ModR/M byte corresponds to.  Once +/// this information is known, we have narrowed down to a single instruction. +struct ModRMDecision { +  uint8_t modrm_type; +  uint16_t instructionIDs; +}; + +/// Specifies which set of ModR/M->instruction tables to look at +/// given a particular opcode. +struct OpcodeDecision { +  ModRMDecision modRMDecisions[256]; +}; + +/// Specifies which opcode->instruction tables to look at given +/// a particular context (set of attributes).  Since there are many possible +/// contexts, the decoder first uses CONTEXTS_SYM to determine which context +/// applies given a specific set of attributes.  Hence there are only IC_max +/// entries in this table, rather than 2^(ATTR_max). +struct ContextDecision { +  OpcodeDecision opcodeDecisions[IC_max]; +}; + +#include "X86GenDisassemblerTables.inc" + +#ifndef NDEBUG +#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) +#else +#define debug(s) do { } while (0) +#endif + +/* + * contextForAttrs - Client for the instruction context table.  Takes a set of + *   attributes and returns the appropriate decode context. + * + * @param attrMask  - Attributes, from the enumeration attributeBits. + * @return          - The InstructionContext to use when looking up an + *                    an instruction with these attributes. + */ +static InstructionContext contextForAttrs(uint16_t attrMask) { +  return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]); +} + +/* + * modRMRequired - Reads the appropriate instruction table to determine whether + *   the ModR/M byte is required to decode a particular instruction. + * + * @param type        - The opcode type (i.e., how many bytes it has). + * @param insnContext - The context for the instruction, as returned by + *                      contextForAttrs. + * @param opcode      - The last byte of the instruction's opcode, not counting + *                      ModR/M extensions and escapes. + * @return            - true if the ModR/M byte is required, false otherwise. + */ +static int modRMRequired(OpcodeType type, +                         InstructionContext insnContext, +                         uint16_t opcode) { +  const struct ContextDecision* decision = nullptr; + +  switch (type) { +  case ONEBYTE: +    decision = &ONEBYTE_SYM; +    break; +  case TWOBYTE: +    decision = &TWOBYTE_SYM; +    break; +  case THREEBYTE_38: +    decision = &THREEBYTE38_SYM; +    break; +  case THREEBYTE_3A: +    decision = &THREEBYTE3A_SYM; +    break; +  case XOP8_MAP: +    decision = &XOP8_MAP_SYM; +    break; +  case XOP9_MAP: +    decision = &XOP9_MAP_SYM; +    break; +  case XOPA_MAP: +    decision = &XOPA_MAP_SYM; +    break; +  case THREEDNOW_MAP: +    decision = &THREEDNOW_MAP_SYM; +    break; +  } + +  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. +    modrm_type != MODRM_ONEENTRY; +} + +/* + * decode - Reads the appropriate instruction table to obtain the unique ID of + *   an instruction. + * + * @param type        - See modRMRequired(). + * @param insnContext - See modRMRequired(). + * @param opcode      - See modRMRequired(). + * @param modRM       - The ModR/M byte if required, or any value if not. + * @return            - The UID of the instruction, or 0 on failure. + */ +static InstrUID decode(OpcodeType type, +                       InstructionContext insnContext, +                       uint8_t opcode, +                       uint8_t modRM) { +  const struct ModRMDecision* dec = nullptr; + +  switch (type) { +  case ONEBYTE: +    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case TWOBYTE: +    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case THREEBYTE_38: +    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case THREEBYTE_3A: +    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case XOP8_MAP: +    dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case XOP9_MAP: +    dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case XOPA_MAP: +    dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  case THREEDNOW_MAP: +    dec = &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; +    break; +  } + +  switch (dec->modrm_type) { +  default: +    debug("Corrupt table!  Unknown modrm_type"); +    return 0; +  case MODRM_ONEENTRY: +    return modRMTable[dec->instructionIDs]; +  case MODRM_SPLITRM: +    if (modFromModRM(modRM) == 0x3) +      return modRMTable[dec->instructionIDs+1]; +    return modRMTable[dec->instructionIDs]; +  case MODRM_SPLITREG: +    if (modFromModRM(modRM) == 0x3) +      return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; +    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; +  case MODRM_SPLITMISC: +    if (modFromModRM(modRM) == 0x3) +      return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; +    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; +  case MODRM_FULL: +    return modRMTable[dec->instructionIDs+modRM]; +  } +} + +/* + * specifierForUID - Given a UID, returns the name and operand specification for + *   that instruction. + * + * @param uid - The unique ID for the instruction.  This should be returned by + *              decode(); specifierForUID will not check bounds. + * @return    - A pointer to the specification for that instruction. + */ +static const struct InstructionSpecifier *specifierForUID(InstrUID uid) { +  return &INSTRUCTIONS_SYM[uid]; +} + +/* + * consumeByte - Uses the reader function provided by the user to consume one + *   byte from the instruction's memory and advance the cursor. + * + * @param insn  - The instruction with the reader function to use.  The cursor + *                for this instruction is advanced. + * @param byte  - A pointer to a pre-allocated memory buffer to be populated + *                with the data read. + * @return      - 0 if the read was successful; nonzero otherwise. + */ +static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { +  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); + +  if (!ret) +    ++(insn->readerCursor); + +  return ret; +} + +/* + * lookAtByte - Like consumeByte, but does not advance the cursor. + * + * @param insn  - See consumeByte(). + * @param byte  - See consumeByte(). + * @return      - See consumeByte(). + */ +static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { +  return insn->reader(insn->readerArg, byte, insn->readerCursor); +} + +static void unconsumeByte(struct InternalInstruction* insn) { +  insn->readerCursor--; +} + +#define CONSUME_FUNC(name, type)                                  \ +  static int name(struct InternalInstruction* insn, type* ptr) {  \ +    type combined = 0;                                            \ +    unsigned offset;                                              \ +    for (offset = 0; offset < sizeof(type); ++offset) {           \ +      uint8_t byte;                                               \ +      int ret = insn->reader(insn->readerArg,                     \ +                             &byte,                               \ +                             insn->readerCursor + offset);        \ +      if (ret)                                                    \ +        return ret;                                               \ +      combined = combined | ((uint64_t)byte << (offset * 8));     \ +    }                                                             \ +    *ptr = combined;                                              \ +    insn->readerCursor += sizeof(type);                           \ +    return 0;                                                     \ +  } + +/* + * consume* - Use the reader function provided by the user to consume data + *   values of various sizes from the instruction's memory and advance the + *   cursor appropriately.  These readers perform endian conversion. + * + * @param insn    - See consumeByte(). + * @param ptr     - A pointer to a pre-allocated memory of appropriate size to + *                  be populated with the data read. + * @return        - See consumeByte(). + */ +CONSUME_FUNC(consumeInt8, int8_t) +CONSUME_FUNC(consumeInt16, int16_t) +CONSUME_FUNC(consumeInt32, int32_t) +CONSUME_FUNC(consumeUInt16, uint16_t) +CONSUME_FUNC(consumeUInt32, uint32_t) +CONSUME_FUNC(consumeUInt64, uint64_t) + +/* + * dbgprintf - Uses the logging function provided by the user to log a single + *   message, typically without a carriage-return. + * + * @param insn    - The instruction containing the logging function. + * @param format  - See printf(). + * @param ...     - See printf(). + */ +static void dbgprintf(struct InternalInstruction* insn, +                      const char* format, +                      ...) { +  char buffer[256]; +  va_list ap; + +  if (!insn->dlog) +    return; + +  va_start(ap, format); +  (void)vsnprintf(buffer, sizeof(buffer), format, ap); +  va_end(ap); + +  insn->dlog(insn->dlogArg, buffer); +} + +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { +  if (insn->mode == MODE_64BIT) +    return prefix >= 0x40 && prefix <= 0x4f; +  return false; +} + +/* + * setPrefixPresent - Marks that a particular prefix is present as mandatory + * + * @param insn      - The instruction to be marked as having the prefix. + * @param prefix    - The prefix that is present. + */ +static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) { +  uint8_t nextByte; +  switch (prefix) { +  case 0xf0: +    insn->hasLockPrefix = true; +    break; +  case 0xf2: +  case 0xf3: +    if (lookAtByte(insn, &nextByte)) +      break; +    // TODO: +    //  1. There could be several 0x66 +    //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then +    //      it's not mandatory prefix +    //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need +    //     0x0f exactly after it to be mandatory prefix +    if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) +      // The last of 0xf2 /0xf3 is mandatory prefix +      insn->mandatoryPrefix = prefix; +    insn->repeatPrefix = prefix; +    break; +  case 0x66: +    if (lookAtByte(insn, &nextByte)) +      break; +    // 0x66 can't overwrite existing mandatory prefix and should be ignored +    if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) +      insn->mandatoryPrefix = prefix; +    break; +  } +} + +/* + * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the + *   instruction as having them.  Also sets the instruction's default operand, + *   address, and other relevant data sizes to report operands correctly. + * + * @param insn  - The instruction whose prefixes are to be read. + * @return      - 0 if the instruction could be read until the end of the prefix + *                bytes, and no prefixes conflicted; nonzero otherwise. + */ +static int readPrefixes(struct InternalInstruction* insn) { +  bool isPrefix = true; +  uint8_t byte = 0; +  uint8_t nextByte; + +  dbgprintf(insn, "readPrefixes()"); + +  while (isPrefix) { +    /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ +    if (consumeByte(insn, &byte)) +      break; + +    /* +     * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then +     * break and let it be disassembled as a normal "instruction". +     */ +    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK +      break; + +    if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { +      /* +       * If the byte is 0xf2 or 0xf3, and any of the following conditions are +       * met: +       * - it is followed by a LOCK (0xf0) prefix +       * - it is followed by an xchg instruction +       * then it should be disassembled as a xacquire/xrelease not repne/rep. +       */ +      if (((nextByte == 0xf0) || +           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { +        insn->xAcquireRelease = true; +        if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support +          break; +      } +      /* +       * Also if the byte is 0xf3, and the following condition is met: +       * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or +       *                       "mov mem, imm" (opcode 0xc6/0xc7) instructions. +       * then it should be disassembled as an xrelease not rep. +       */ +      if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || +                           nextByte == 0xc6 || nextByte == 0xc7)) { +        insn->xAcquireRelease = true; +        break; +      } +      if (isREX(insn, nextByte)) { +        uint8_t nnextByte; +        // Go to REX prefix after the current one +        if (consumeByte(insn, &nnextByte)) +          return -1; +        // We should be able to read next byte after REX prefix +        if (lookAtByte(insn, &nnextByte)) +          return -1; +        unconsumeByte(insn); +      } +    } + +    switch (byte) { +    case 0xf0:  /* LOCK */ +    case 0xf2:  /* REPNE/REPNZ */ +    case 0xf3:  /* REP or REPE/REPZ */ +      setPrefixPresent(insn, byte); +      break; +    case 0x2e:  /* CS segment override -OR- Branch not taken */ +    case 0x36:  /* SS segment override -OR- Branch taken */ +    case 0x3e:  /* DS segment override */ +    case 0x26:  /* ES segment override */ +    case 0x64:  /* FS segment override */ +    case 0x65:  /* GS segment override */ +      switch (byte) { +      case 0x2e: +        insn->segmentOverride = SEG_OVERRIDE_CS; +        break; +      case 0x36: +        insn->segmentOverride = SEG_OVERRIDE_SS; +        break; +      case 0x3e: +        insn->segmentOverride = SEG_OVERRIDE_DS; +        break; +      case 0x26: +        insn->segmentOverride = SEG_OVERRIDE_ES; +        break; +      case 0x64: +        insn->segmentOverride = SEG_OVERRIDE_FS; +        break; +      case 0x65: +        insn->segmentOverride = SEG_OVERRIDE_GS; +        break; +      default: +        debug("Unhandled override"); +        return -1; +      } +      setPrefixPresent(insn, byte); +      break; +    case 0x66:  /* Operand-size override */ +      insn->hasOpSize = true; +      setPrefixPresent(insn, byte); +      break; +    case 0x67:  /* Address-size override */ +      insn->hasAdSize = true; +      setPrefixPresent(insn, byte); +      break; +    default:    /* Not a prefix byte */ +      isPrefix = false; +      break; +    } + +    if (isPrefix) +      dbgprintf(insn, "Found prefix 0x%hhx", byte); +  } + +  insn->vectorExtensionType = TYPE_NO_VEX_XOP; + +  if (byte == 0x62) { +    uint8_t byte1, byte2; + +    if (consumeByte(insn, &byte1)) { +      dbgprintf(insn, "Couldn't read second byte of EVEX prefix"); +      return -1; +    } + +    if (lookAtByte(insn, &byte2)) { +      dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); +      return -1; +    } + +    if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && +       ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { +      insn->vectorExtensionType = TYPE_EVEX; +    } else { +      unconsumeByte(insn); /* unconsume byte1 */ +      unconsumeByte(insn); /* unconsume byte  */ +    } + +    if (insn->vectorExtensionType == TYPE_EVEX) { +      insn->vectorExtensionPrefix[0] = byte; +      insn->vectorExtensionPrefix[1] = byte1; +      if (consumeByte(insn, &insn->vectorExtensionPrefix[2])) { +        dbgprintf(insn, "Couldn't read third byte of EVEX prefix"); +        return -1; +      } +      if (consumeByte(insn, &insn->vectorExtensionPrefix[3])) { +        dbgprintf(insn, "Couldn't read fourth byte of EVEX prefix"); +        return -1; +      } + +      /* We simulate the REX prefix for simplicity's sake */ +      if (insn->mode == MODE_64BIT) { +        insn->rexPrefix = 0x40 +                        | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); +      } + +      dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", +              insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +              insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); +    } +  } else if (byte == 0xc4) { +    uint8_t byte1; + +    if (lookAtByte(insn, &byte1)) { +      dbgprintf(insn, "Couldn't read second byte of VEX"); +      return -1; +    } + +    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) +      insn->vectorExtensionType = TYPE_VEX_3B; +    else +      unconsumeByte(insn); + +    if (insn->vectorExtensionType == TYPE_VEX_3B) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]); +      consumeByte(insn, &insn->vectorExtensionPrefix[2]); + +      /* We simulate the REX prefix for simplicity's sake */ + +      if (insn->mode == MODE_64BIT) +        insn->rexPrefix = 0x40 +                        | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); + +      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", +                insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +                insn->vectorExtensionPrefix[2]); +    } +  } else if (byte == 0xc5) { +    uint8_t byte1; + +    if (lookAtByte(insn, &byte1)) { +      dbgprintf(insn, "Couldn't read second byte of VEX"); +      return -1; +    } + +    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) +      insn->vectorExtensionType = TYPE_VEX_2B; +    else +      unconsumeByte(insn); + +    if (insn->vectorExtensionType == TYPE_VEX_2B) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]); + +      if (insn->mode == MODE_64BIT) +        insn->rexPrefix = 0x40 +                        | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); + +      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { +      default: +        break; +      case VEX_PREFIX_66: +        insn->hasOpSize = true; +        break; +      } + +      dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", +                insn->vectorExtensionPrefix[0], +                insn->vectorExtensionPrefix[1]); +    } +  } else if (byte == 0x8f) { +    uint8_t byte1; + +    if (lookAtByte(insn, &byte1)) { +      dbgprintf(insn, "Couldn't read second byte of XOP"); +      return -1; +    } + +    if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */ +      insn->vectorExtensionType = TYPE_XOP; +    else +      unconsumeByte(insn); + +    if (insn->vectorExtensionType == TYPE_XOP) { +      insn->vectorExtensionPrefix[0] = byte; +      consumeByte(insn, &insn->vectorExtensionPrefix[1]); +      consumeByte(insn, &insn->vectorExtensionPrefix[2]); + +      /* We simulate the REX prefix for simplicity's sake */ + +      if (insn->mode == MODE_64BIT) +        insn->rexPrefix = 0x40 +                        | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) +                        | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) +                        | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) +                        | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); + +      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { +      default: +        break; +      case VEX_PREFIX_66: +        insn->hasOpSize = true; +        break; +      } + +      dbgprintf(insn, "Found XOP prefix 0x%hhx 0x%hhx 0x%hhx", +                insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], +                insn->vectorExtensionPrefix[2]); +    } +  } else if (isREX(insn, byte)) { +    if (lookAtByte(insn, &nextByte)) +      return -1; +    insn->rexPrefix = byte; +    dbgprintf(insn, "Found REX prefix 0x%hhx", byte); +  } else +    unconsumeByte(insn); + +  if (insn->mode == MODE_16BIT) { +    insn->registerSize = (insn->hasOpSize ? 4 : 2); +    insn->addressSize = (insn->hasAdSize ? 4 : 2); +    insn->displacementSize = (insn->hasAdSize ? 4 : 2); +    insn->immediateSize = (insn->hasOpSize ? 4 : 2); +  } else if (insn->mode == MODE_32BIT) { +    insn->registerSize = (insn->hasOpSize ? 2 : 4); +    insn->addressSize = (insn->hasAdSize ? 2 : 4); +    insn->displacementSize = (insn->hasAdSize ? 2 : 4); +    insn->immediateSize = (insn->hasOpSize ? 2 : 4); +  } else if (insn->mode == MODE_64BIT) { +    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { +      insn->registerSize       = 8; +      insn->addressSize = (insn->hasAdSize ? 4 : 8); +      insn->displacementSize   = 4; +      insn->immediateSize      = 4; +    } else { +      insn->registerSize = (insn->hasOpSize ? 2 : 4); +      insn->addressSize = (insn->hasAdSize ? 4 : 8); +      insn->displacementSize = (insn->hasOpSize ? 2 : 4); +      insn->immediateSize = (insn->hasOpSize ? 2 : 4); +    } +  } + +  return 0; +} + +static int readModRM(struct InternalInstruction* insn); + +/* + * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of + *   extended or escape opcodes). + * + * @param insn  - The instruction whose opcode is to be read. + * @return      - 0 if the opcode could be read successfully; nonzero otherwise. + */ +static int readOpcode(struct InternalInstruction* insn) { +  /* Determine the length of the primary opcode */ + +  uint8_t current; + +  dbgprintf(insn, "readOpcode()"); + +  insn->opcodeType = ONEBYTE; + +  if (insn->vectorExtensionType == TYPE_EVEX) { +    switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { +    default: +      dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", +                mmFromEVEX2of4(insn->vectorExtensionPrefix[1])); +      return -1; +    case VEX_LOB_0F: +      insn->opcodeType = TWOBYTE; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F38: +      insn->opcodeType = THREEBYTE_38; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F3A: +      insn->opcodeType = THREEBYTE_3A; +      return consumeByte(insn, &insn->opcode); +    } +  } else if (insn->vectorExtensionType == TYPE_VEX_3B) { +    switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { +    default: +      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", +                mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); +      return -1; +    case VEX_LOB_0F: +      insn->opcodeType = TWOBYTE; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F38: +      insn->opcodeType = THREEBYTE_38; +      return consumeByte(insn, &insn->opcode); +    case VEX_LOB_0F3A: +      insn->opcodeType = THREEBYTE_3A; +      return consumeByte(insn, &insn->opcode); +    } +  } else if (insn->vectorExtensionType == TYPE_VEX_2B) { +    insn->opcodeType = TWOBYTE; +    return consumeByte(insn, &insn->opcode); +  } else if (insn->vectorExtensionType == TYPE_XOP) { +    switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { +    default: +      dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", +                mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])); +      return -1; +    case XOP_MAP_SELECT_8: +      insn->opcodeType = XOP8_MAP; +      return consumeByte(insn, &insn->opcode); +    case XOP_MAP_SELECT_9: +      insn->opcodeType = XOP9_MAP; +      return consumeByte(insn, &insn->opcode); +    case XOP_MAP_SELECT_A: +      insn->opcodeType = XOPA_MAP; +      return consumeByte(insn, &insn->opcode); +    } +  } + +  if (consumeByte(insn, ¤t)) +    return -1; + +  if (current == 0x0f) { +    dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current); + +    if (consumeByte(insn, ¤t)) +      return -1; + +    if (current == 0x38) { +      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + +      if (consumeByte(insn, ¤t)) +        return -1; + +      insn->opcodeType = THREEBYTE_38; +    } else if (current == 0x3a) { +      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + +      if (consumeByte(insn, ¤t)) +        return -1; + +      insn->opcodeType = THREEBYTE_3A; +    } else if (current == 0x0f) { +      dbgprintf(insn, "Found a 3dnow escape prefix (0x%hhx)", current); + +      // Consume operands before the opcode to comply with the 3DNow encoding +      if (readModRM(insn)) +        return -1; + +      if (consumeByte(insn, ¤t)) +        return -1; + +      insn->opcodeType = THREEDNOW_MAP; +    } else { +      dbgprintf(insn, "Didn't find a three-byte escape prefix"); + +      insn->opcodeType = TWOBYTE; +    } +  } else if (insn->mandatoryPrefix) +    // The opcode with mandatory prefix must start with opcode escape. +    // If not it's legacy repeat prefix +    insn->mandatoryPrefix = 0; + +  /* +   * At this point we have consumed the full opcode. +   * Anything we consume from here on must be unconsumed. +   */ + +  insn->opcode = current; + +  return 0; +} + +/* + * getIDWithAttrMask - Determines the ID of an instruction, consuming + *   the ModR/M byte as appropriate for extended and escape opcodes, + *   and using a supplied attribute mask. + * + * @param instructionID - A pointer whose target is filled in with the ID of the + *                        instruction. + * @param insn          - The instruction whose ID is to be determined. + * @param attrMask      - The attribute mask to search. + * @return              - 0 if the ModR/M could be read when needed or was not + *                        needed; nonzero otherwise. + */ +static int getIDWithAttrMask(uint16_t* instructionID, +                             struct InternalInstruction* insn, +                             uint16_t attrMask) { +  bool hasModRMExtension; + +  InstructionContext instructionClass = contextForAttrs(attrMask); + +  hasModRMExtension = modRMRequired(insn->opcodeType, +                                    instructionClass, +                                    insn->opcode); + +  if (hasModRMExtension) { +    if (readModRM(insn)) +      return -1; + +    *instructionID = decode(insn->opcodeType, +                            instructionClass, +                            insn->opcode, +                            insn->modRM); +  } else { +    *instructionID = decode(insn->opcodeType, +                            instructionClass, +                            insn->opcode, +                            0); +  } + +  return 0; +} + +/* + * is16BitEquivalent - Determines whether two instruction names refer to + * equivalent instructions but one is 16-bit whereas the other is not. + * + * @param orig  - The instruction that is not 16-bit + * @param equiv - The instruction that is 16-bit + */ +static bool is16BitEquivalent(const char *orig, const char *equiv) { +  off_t i; + +  for (i = 0;; i++) { +    if (orig[i] == '\0' && equiv[i] == '\0') +      return true; +    if (orig[i] == '\0' || equiv[i] == '\0') +      return false; +    if (orig[i] != equiv[i]) { +      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W') +        continue; +      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1') +        continue; +      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6') +        continue; +      return false; +    } +  } +} + +/* + * is64Bit - Determines whether this instruction is a 64-bit instruction. + * + * @param name - The instruction that is not 16-bit + */ +static bool is64Bit(const char *name) { +  off_t i; + +  for (i = 0;; ++i) { +    if (name[i] == '\0') +      return false; +    if (name[i] == '6' && name[i+1] == '4') +      return true; +  } +} + +/* + * getID - Determines the ID of an instruction, consuming the ModR/M byte as + *   appropriate for extended and escape opcodes.  Determines the attributes and + *   context for the instruction before doing so. + * + * @param insn  - The instruction whose ID is to be determined. + * @return      - 0 if the ModR/M could be read when needed or was not needed; + *                nonzero otherwise. + */ +static int getID(struct InternalInstruction* insn, const void *miiArg) { +  uint16_t attrMask; +  uint16_t instructionID; + +  dbgprintf(insn, "getID()"); + +  attrMask = ATTR_NONE; + +  if (insn->mode == MODE_64BIT) +    attrMask |= ATTR_64BIT; + +  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) { +    attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX; + +    if (insn->vectorExtensionType == TYPE_EVEX) { +      switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) { +      case VEX_PREFIX_66: +        attrMask |= ATTR_OPSIZE; +        break; +      case VEX_PREFIX_F3: +        attrMask |= ATTR_XS; +        break; +      case VEX_PREFIX_F2: +        attrMask |= ATTR_XD; +        break; +      } + +      if (zFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXKZ; +      if (bFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXB; +      if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXK; +      if (lFromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_VEXL; +      if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) +        attrMask |= ATTR_EVEXL2; +    } else if (insn->vectorExtensionType == TYPE_VEX_3B) { +      switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { +      case VEX_PREFIX_66: +        attrMask |= ATTR_OPSIZE; +        break; +      case VEX_PREFIX_F3: +        attrMask |= ATTR_XS; +        break; +      case VEX_PREFIX_F2: +        attrMask |= ATTR_XD; +        break; +      } + +      if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) +        attrMask |= ATTR_VEXL; +    } else if (insn->vectorExtensionType == TYPE_VEX_2B) { +      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { +      case VEX_PREFIX_66: +        attrMask |= ATTR_OPSIZE; +        break; +      case VEX_PREFIX_F3: +        attrMask |= ATTR_XS; +        break; +      case VEX_PREFIX_F2: +        attrMask |= ATTR_XD; +        break; +      } + +      if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) +        attrMask |= ATTR_VEXL; +    } else if (insn->vectorExtensionType == TYPE_XOP) { +      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { +      case VEX_PREFIX_66: +        attrMask |= ATTR_OPSIZE; +        break; +      case VEX_PREFIX_F3: +        attrMask |= ATTR_XS; +        break; +      case VEX_PREFIX_F2: +        attrMask |= ATTR_XD; +        break; +      } + +      if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) +        attrMask |= ATTR_VEXL; +    } else { +      return -1; +    } +  } else if (!insn->mandatoryPrefix) { +    // If we don't have mandatory prefix we should use legacy prefixes here +    if (insn->hasOpSize && (insn->mode != MODE_16BIT)) +      attrMask |= ATTR_OPSIZE; +    if (insn->hasAdSize) +      attrMask |= ATTR_ADSIZE; +    if (insn->opcodeType == ONEBYTE) { +      if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) +        // Special support for PAUSE +        attrMask |= ATTR_XS; +    } else { +      if (insn->repeatPrefix == 0xf2) +        attrMask |= ATTR_XD; +      else if (insn->repeatPrefix == 0xf3) +        attrMask |= ATTR_XS; +    } +  } else { +    switch (insn->mandatoryPrefix) { +    case 0xf2: +      attrMask |= ATTR_XD; +      break; +    case 0xf3: +      attrMask |= ATTR_XS; +      break; +    case 0x66: +      if (insn->mode != MODE_16BIT) +        attrMask |= ATTR_OPSIZE; +      break; +    case 0x67: +      attrMask |= ATTR_ADSIZE; +      break; +    } + +  } + +  if (insn->rexPrefix & 0x08) { +    attrMask |= ATTR_REXW; +    attrMask &= ~ATTR_ADSIZE; +  } + +  /* +   * JCXZ/JECXZ need special handling for 16-bit mode because the meaning +   * of the AdSize prefix is inverted w.r.t. 32-bit mode. +   */ +  if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE && +      insn->opcode == 0xE3) +    attrMask ^= ATTR_ADSIZE; + +  // If we're in 16-bit mode and this is one of the relative jumps and opsize +  // prefix isn't present, we need to force the opsize attribute since the +  // prefix is inverted relative to 32-bit mode. +  if (insn->mode == MODE_16BIT && !insn->hasOpSize && +      insn->opcodeType == ONEBYTE && +      (insn->opcode == 0xE8 || insn->opcode == 0xE9)) +    attrMask |= ATTR_OPSIZE; + +  if (insn->mode == MODE_16BIT && !insn->hasOpSize && +      insn->opcodeType == TWOBYTE && +      insn->opcode >= 0x80 && insn->opcode <= 0x8F) +    attrMask |= ATTR_OPSIZE; + +  if (getIDWithAttrMask(&instructionID, insn, attrMask)) +    return -1; + +  /* The following clauses compensate for limitations of the tables. */ + +  if (insn->mode != MODE_64BIT && +      insn->vectorExtensionType != TYPE_NO_VEX_XOP) { +    /* +     * The tables can't distinquish between cases where the W-bit is used to +     * select register size and cases where its a required part of the opcode. +     */ +    if ((insn->vectorExtensionType == TYPE_EVEX && +         wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || +        (insn->vectorExtensionType == TYPE_VEX_3B && +         wFromVEX3of3(insn->vectorExtensionPrefix[2])) || +        (insn->vectorExtensionType == TYPE_XOP && +         wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { + +      uint16_t instructionIDWithREXW; +      if (getIDWithAttrMask(&instructionIDWithREXW, +                            insn, attrMask | ATTR_REXW)) { +        insn->instructionID = instructionID; +        insn->spec = specifierForUID(instructionID); +        return 0; +      } + +      auto SpecName = GetInstrName(instructionIDWithREXW, miiArg); +      // If not a 64-bit instruction. Switch the opcode. +      if (!is64Bit(SpecName.data())) { +        insn->instructionID = instructionIDWithREXW; +        insn->spec = specifierForUID(instructionIDWithREXW); +        return 0; +      } +    } +  } + +  /* +   * Absolute moves, umonitor, and movdir64b need special handling. +   * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are +   *  inverted w.r.t. +   * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in +   *  any position. +   */ +  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) || +      (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) || +      (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8)) { +    /* Make sure we observed the prefixes in any position. */ +    if (insn->hasAdSize) +      attrMask |= ATTR_ADSIZE; +    if (insn->hasOpSize) +      attrMask |= ATTR_OPSIZE; + +    /* In 16-bit, invert the attributes. */ +    if (insn->mode == MODE_16BIT) { +      attrMask ^= ATTR_ADSIZE; + +      /* The OpSize attribute is only valid with the absolute moves. */ +      if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) +        attrMask ^= ATTR_OPSIZE; +    } + +    if (getIDWithAttrMask(&instructionID, insn, attrMask)) +      return -1; + +    insn->instructionID = instructionID; +    insn->spec = specifierForUID(instructionID); +    return 0; +  } + +  if ((insn->mode == MODE_16BIT || insn->hasOpSize) && +      !(attrMask & ATTR_OPSIZE)) { +    /* +     * The instruction tables make no distinction between instructions that +     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a +     * particular spot (i.e., many MMX operations).  In general we're +     * conservative, but in the specific case where OpSize is present but not +     * in the right place we check if there's a 16-bit operation. +     */ + +    const struct InstructionSpecifier *spec; +    uint16_t instructionIDWithOpsize; +    llvm::StringRef specName, specWithOpSizeName; + +    spec = specifierForUID(instructionID); + +    if (getIDWithAttrMask(&instructionIDWithOpsize, +                          insn, +                          attrMask | ATTR_OPSIZE)) { +      /* +       * ModRM required with OpSize but not present; give up and return version +       * without OpSize set +       */ + +      insn->instructionID = instructionID; +      insn->spec = spec; +      return 0; +    } + +    specName = GetInstrName(instructionID, miiArg); +    specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); + +    if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && +        (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { +      insn->instructionID = instructionIDWithOpsize; +      insn->spec = specifierForUID(instructionIDWithOpsize); +    } else { +      insn->instructionID = instructionID; +      insn->spec = spec; +    } +    return 0; +  } + +  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && +      insn->rexPrefix & 0x01) { +    /* +     * NOOP shouldn't decode as NOOP if REX.b is set. Instead +     * it should decode as XCHG %r8, %eax. +     */ + +    const struct InstructionSpecifier *spec; +    uint16_t instructionIDWithNewOpcode; +    const struct InstructionSpecifier *specWithNewOpcode; + +    spec = specifierForUID(instructionID); + +    /* Borrow opcode from one of the other XCHGar opcodes */ +    insn->opcode = 0x91; + +    if (getIDWithAttrMask(&instructionIDWithNewOpcode, +                          insn, +                          attrMask)) { +      insn->opcode = 0x90; + +      insn->instructionID = instructionID; +      insn->spec = spec; +      return 0; +    } + +    specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); + +    /* Change back */ +    insn->opcode = 0x90; + +    insn->instructionID = instructionIDWithNewOpcode; +    insn->spec = specWithNewOpcode; + +    return 0; +  } + +  insn->instructionID = instructionID; +  insn->spec = specifierForUID(insn->instructionID); + +  return 0; +} + +/* + * readSIB - Consumes the SIB byte to determine addressing information for an + *   instruction. + * + * @param insn  - The instruction whose SIB byte is to be read. + * @return      - 0 if the SIB byte was successfully read; nonzero otherwise. + */ +static int readSIB(struct InternalInstruction* insn) { +  SIBBase sibBaseBase = SIB_BASE_NONE; +  uint8_t index, base; + +  dbgprintf(insn, "readSIB()"); + +  if (insn->consumedSIB) +    return 0; + +  insn->consumedSIB = true; + +  switch (insn->addressSize) { +  case 2: +    dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode"); +    return -1; +  case 4: +    insn->sibIndexBase = SIB_INDEX_EAX; +    sibBaseBase = SIB_BASE_EAX; +    break; +  case 8: +    insn->sibIndexBase = SIB_INDEX_RAX; +    sibBaseBase = SIB_BASE_RAX; +    break; +  } + +  if (consumeByte(insn, &insn->sib)) +    return -1; + +  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3); + +  if (index == 0x4) { +    insn->sibIndex = SIB_INDEX_NONE; +  } else { +    insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index); +  } + +  insn->sibScale = 1 << scaleFromSIB(insn->sib); + +  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3); + +  switch (base) { +  case 0x5: +  case 0xd: +    switch (modFromModRM(insn->modRM)) { +    case 0x0: +      insn->eaDisplacement = EA_DISP_32; +      insn->sibBase = SIB_BASE_NONE; +      break; +    case 0x1: +      insn->eaDisplacement = EA_DISP_8; +      insn->sibBase = (SIBBase)(sibBaseBase + base); +      break; +    case 0x2: +      insn->eaDisplacement = EA_DISP_32; +      insn->sibBase = (SIBBase)(sibBaseBase + base); +      break; +    case 0x3: +      debug("Cannot have Mod = 0b11 and a SIB byte"); +      return -1; +    } +    break; +  default: +    insn->sibBase = (SIBBase)(sibBaseBase + base); +    break; +  } + +  return 0; +} + +/* + * readDisplacement - Consumes the displacement of an instruction. + * + * @param insn  - The instruction whose displacement is to be read. + * @return      - 0 if the displacement byte was successfully read; nonzero + *                otherwise. + */ +static int readDisplacement(struct InternalInstruction* insn) { +  int8_t d8; +  int16_t d16; +  int32_t d32; + +  dbgprintf(insn, "readDisplacement()"); + +  if (insn->consumedDisplacement) +    return 0; + +  insn->consumedDisplacement = true; +  insn->displacementOffset = insn->readerCursor - insn->startLocation; + +  switch (insn->eaDisplacement) { +  case EA_DISP_NONE: +    insn->consumedDisplacement = false; +    break; +  case EA_DISP_8: +    if (consumeInt8(insn, &d8)) +      return -1; +    insn->displacement = d8; +    break; +  case EA_DISP_16: +    if (consumeInt16(insn, &d16)) +      return -1; +    insn->displacement = d16; +    break; +  case EA_DISP_32: +    if (consumeInt32(insn, &d32)) +      return -1; +    insn->displacement = d32; +    break; +  } + +  insn->consumedDisplacement = true; +  return 0; +} + +/* + * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and + *   displacement) for an instruction and interprets it. + * + * @param insn  - The instruction whose addressing information is to be read. + * @return      - 0 if the information was successfully read; nonzero otherwise. + */ +static int readModRM(struct InternalInstruction* insn) { +  uint8_t mod, rm, reg, evexrm; + +  dbgprintf(insn, "readModRM()"); + +  if (insn->consumedModRM) +    return 0; + +  if (consumeByte(insn, &insn->modRM)) +    return -1; +  insn->consumedModRM = true; + +  mod     = modFromModRM(insn->modRM); +  rm      = rmFromModRM(insn->modRM); +  reg     = regFromModRM(insn->modRM); + +  /* +   * This goes by insn->registerSize to pick the correct register, which messes +   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in +   * fixupReg(). +   */ +  switch (insn->registerSize) { +  case 2: +    insn->regBase = MODRM_REG_AX; +    insn->eaRegBase = EA_REG_AX; +    break; +  case 4: +    insn->regBase = MODRM_REG_EAX; +    insn->eaRegBase = EA_REG_EAX; +    break; +  case 8: +    insn->regBase = MODRM_REG_RAX; +    insn->eaRegBase = EA_REG_RAX; +    break; +  } + +  reg |= rFromREX(insn->rexPrefix) << 3; +  rm  |= bFromREX(insn->rexPrefix) << 3; + +  evexrm = 0; +  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT) { +    reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; +    evexrm = xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; +  } + +  insn->reg = (Reg)(insn->regBase + reg); + +  switch (insn->addressSize) { +  case 2: { +    EABase eaBaseBase = EA_BASE_BX_SI; + +    switch (mod) { +    case 0x0: +      if (rm == 0x6) { +        insn->eaBase = EA_BASE_NONE; +        insn->eaDisplacement = EA_DISP_16; +        if (readDisplacement(insn)) +          return -1; +      } else { +        insn->eaBase = (EABase)(eaBaseBase + rm); +        insn->eaDisplacement = EA_DISP_NONE; +      } +      break; +    case 0x1: +      insn->eaBase = (EABase)(eaBaseBase + rm); +      insn->eaDisplacement = EA_DISP_8; +      insn->displacementSize = 1; +      if (readDisplacement(insn)) +        return -1; +      break; +    case 0x2: +      insn->eaBase = (EABase)(eaBaseBase + rm); +      insn->eaDisplacement = EA_DISP_16; +      if (readDisplacement(insn)) +        return -1; +      break; +    case 0x3: +      insn->eaBase = (EABase)(insn->eaRegBase + rm); +      if (readDisplacement(insn)) +        return -1; +      break; +    } +    break; +  } +  case 4: +  case 8: { +    EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX); + +    switch (mod) { +    case 0x0: +      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */ +      // In determining whether RIP-relative mode is used (rm=5), +      // or whether a SIB byte is present (rm=4), +      // the extension bits (REX.b and EVEX.x) are ignored. +      switch (rm & 7) { +      case 0x4: // SIB byte is present +        insn->eaBase = (insn->addressSize == 4 ? +                        EA_BASE_sib : EA_BASE_sib64); +        if (readSIB(insn) || readDisplacement(insn)) +          return -1; +        break; +      case 0x5: // RIP-relative +        insn->eaBase = EA_BASE_NONE; +        insn->eaDisplacement = EA_DISP_32; +        if (readDisplacement(insn)) +          return -1; +        break; +      default: +        insn->eaBase = (EABase)(eaBaseBase + rm); +        break; +      } +      break; +    case 0x1: +      insn->displacementSize = 1; +      LLVM_FALLTHROUGH; +    case 0x2: +      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); +      switch (rm & 7) { +      case 0x4: // SIB byte is present +        insn->eaBase = EA_BASE_sib; +        if (readSIB(insn) || readDisplacement(insn)) +          return -1; +        break; +      default: +        insn->eaBase = (EABase)(eaBaseBase + rm); +        if (readDisplacement(insn)) +          return -1; +        break; +      } +      break; +    case 0x3: +      insn->eaDisplacement = EA_DISP_NONE; +      insn->eaBase = (EABase)(insn->eaRegBase + rm + evexrm); +      break; +    } +    break; +  } +  } /* switch (insn->addressSize) */ + +  return 0; +} + +#define GENERIC_FIXUP_FUNC(name, base, prefix, mask)      \ +  static uint16_t name(struct InternalInstruction *insn,  \ +                       OperandType type,                  \ +                       uint8_t index,                     \ +                       uint8_t *valid) {                  \ +    *valid = 1;                                           \ +    switch (type) {                                       \ +    default:                                              \ +      debug("Unhandled register type");                   \ +      *valid = 0;                                         \ +      return 0;                                           \ +    case TYPE_Rv:                                         \ +      return base + index;                                \ +    case TYPE_R8:                                         \ +      index &= mask;                                      \ +      if (index > 0xf)                                    \ +        *valid = 0;                                       \ +      if (insn->rexPrefix &&                              \ +         index >= 4 && index <= 7) {                      \ +        return prefix##_SPL + (index - 4);                \ +      } else {                                            \ +        return prefix##_AL + index;                       \ +      }                                                   \ +    case TYPE_R16:                                        \ +      index &= mask;                                      \ +      if (index > 0xf)                                    \ +        *valid = 0;                                       \ +      return prefix##_AX + index;                         \ +    case TYPE_R32:                                        \ +      index &= mask;                                      \ +      if (index > 0xf)                                    \ +        *valid = 0;                                       \ +      return prefix##_EAX + index;                        \ +    case TYPE_R64:                                        \ +      index &= mask;                                      \ +      if (index > 0xf)                                    \ +        *valid = 0;                                       \ +      return prefix##_RAX + index;                        \ +    case TYPE_ZMM:                                        \ +      return prefix##_ZMM0 + index;                       \ +    case TYPE_YMM:                                        \ +      return prefix##_YMM0 + index;                       \ +    case TYPE_XMM:                                        \ +      return prefix##_XMM0 + index;                       \ +    case TYPE_VK:                                         \ +      index &= 0xf;                                       \ +      if (index > 7)                                      \ +        *valid = 0;                                       \ +      return prefix##_K0 + index;                         \ +    case TYPE_VK_PAIR:                                    \ +      if (index > 7)                                      \ +        *valid = 0;                                       \ +      return prefix##_K0_K1 + (index / 2);                \ +    case TYPE_MM64:                                       \ +      return prefix##_MM0 + (index & 0x7);                \ +    case TYPE_SEGMENTREG:                                 \ +      if ((index & 7) > 5)                                \ +        *valid = 0;                                       \ +      return prefix##_ES + (index & 7);                   \ +    case TYPE_DEBUGREG:                                   \ +      return prefix##_DR0 + index;                        \ +    case TYPE_CONTROLREG:                                 \ +      return prefix##_CR0 + index;                        \ +    case TYPE_BNDR:                                       \ +      if (index > 3)                                      \ +        *valid = 0;                                       \ +      return prefix##_BND0 + index;                       \ +    case TYPE_MVSIBX:                                     \ +      return prefix##_XMM0 + index;                       \ +    case TYPE_MVSIBY:                                     \ +      return prefix##_YMM0 + index;                       \ +    case TYPE_MVSIBZ:                                     \ +      return prefix##_ZMM0 + index;                       \ +    }                                                     \ +  } + +/* + * fixup*Value - Consults an operand type to determine the meaning of the + *   reg or R/M field.  If the operand is an XMM operand, for example, an + *   operand would be XMM0 instead of AX, which readModRM() would otherwise + *   misinterpret it as. + * + * @param insn  - The instruction containing the operand. + * @param type  - The operand type. + * @param index - The existing value of the field as reported by readModRM(). + * @param valid - The address of a uint8_t.  The target is set to 1 if the + *                field is valid for the register class; 0 if not. + * @return      - The proper value. + */ +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG, 0x1f) +GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG,    0xf) + +/* + * fixupReg - Consults an operand specifier to determine which of the + *   fixup*Value functions to use in correcting readModRM()'ss interpretation. + * + * @param insn  - See fixup*Value(). + * @param op    - The operand specifier. + * @return      - 0 if fixup was successful; -1 if the register returned was + *                invalid for its class. + */ +static int fixupReg(struct InternalInstruction *insn, +                    const struct OperandSpecifier *op) { +  uint8_t valid; + +  dbgprintf(insn, "fixupReg()"); + +  switch ((OperandEncoding)op->encoding) { +  default: +    debug("Expected a REG or R/M encoding in fixupReg"); +    return -1; +  case ENCODING_VVVV: +    insn->vvvv = (Reg)fixupRegValue(insn, +                                    (OperandType)op->type, +                                    insn->vvvv, +                                    &valid); +    if (!valid) +      return -1; +    break; +  case ENCODING_REG: +    insn->reg = (Reg)fixupRegValue(insn, +                                   (OperandType)op->type, +                                   insn->reg - insn->regBase, +                                   &valid); +    if (!valid) +      return -1; +    break; +  CASE_ENCODING_RM: +    if (insn->eaBase >= insn->eaRegBase) { +      insn->eaBase = (EABase)fixupRMValue(insn, +                                          (OperandType)op->type, +                                          insn->eaBase - insn->eaRegBase, +                                          &valid); +      if (!valid) +        return -1; +    } +    break; +  } + +  return 0; +} + +/* + * readOpcodeRegister - Reads an operand from the opcode field of an + *   instruction and interprets it appropriately given the operand width. + *   Handles AddRegFrm instructions. + * + * @param insn  - the instruction whose opcode field is to be read. + * @param size  - The width (in bytes) of the register being specified. + *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means + *                RAX. + * @return      - 0 on success; nonzero otherwise. + */ +static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { +  dbgprintf(insn, "readOpcodeRegister()"); + +  if (size == 0) +    size = insn->registerSize; + +  switch (size) { +  case 1: +    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) +                                                  | (insn->opcode & 7))); +    if (insn->rexPrefix && +        insn->opcodeRegister >= MODRM_REG_AL + 0x4 && +        insn->opcodeRegister < MODRM_REG_AL + 0x8) { +      insn->opcodeRegister = (Reg)(MODRM_REG_SPL +                                   + (insn->opcodeRegister - MODRM_REG_AL - 4)); +    } + +    break; +  case 2: +    insn->opcodeRegister = (Reg)(MODRM_REG_AX +                                 + ((bFromREX(insn->rexPrefix) << 3) +                                    | (insn->opcode & 7))); +    break; +  case 4: +    insn->opcodeRegister = (Reg)(MODRM_REG_EAX +                                 + ((bFromREX(insn->rexPrefix) << 3) +                                    | (insn->opcode & 7))); +    break; +  case 8: +    insn->opcodeRegister = (Reg)(MODRM_REG_RAX +                                 + ((bFromREX(insn->rexPrefix) << 3) +                                    | (insn->opcode & 7))); +    break; +  } + +  return 0; +} + +/* + * readImmediate - Consumes an immediate operand from an instruction, given the + *   desired operand size. + * + * @param insn  - The instruction whose operand is to be read. + * @param size  - The width (in bytes) of the operand. + * @return      - 0 if the immediate was successfully consumed; nonzero + *                otherwise. + */ +static int readImmediate(struct InternalInstruction* insn, uint8_t size) { +  uint8_t imm8; +  uint16_t imm16; +  uint32_t imm32; +  uint64_t imm64; + +  dbgprintf(insn, "readImmediate()"); + +  if (insn->numImmediatesConsumed == 2) { +    debug("Already consumed two immediates"); +    return -1; +  } + +  if (size == 0) +    size = insn->immediateSize; +  else +    insn->immediateSize = size; +  insn->immediateOffset = insn->readerCursor - insn->startLocation; + +  switch (size) { +  case 1: +    if (consumeByte(insn, &imm8)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm8; +    break; +  case 2: +    if (consumeUInt16(insn, &imm16)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm16; +    break; +  case 4: +    if (consumeUInt32(insn, &imm32)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm32; +    break; +  case 8: +    if (consumeUInt64(insn, &imm64)) +      return -1; +    insn->immediates[insn->numImmediatesConsumed] = imm64; +    break; +  } + +  insn->numImmediatesConsumed++; + +  return 0; +} + +/* + * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. + * + * @param insn  - The instruction whose operand is to be read. + * @return      - 0 if the vvvv was successfully consumed; nonzero + *                otherwise. + */ +static int readVVVV(struct InternalInstruction* insn) { +  dbgprintf(insn, "readVVVV()"); + +  int vvvv; +  if (insn->vectorExtensionType == TYPE_EVEX) +    vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | +            vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); +  else if (insn->vectorExtensionType == TYPE_VEX_3B) +    vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); +  else if (insn->vectorExtensionType == TYPE_VEX_2B) +    vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]); +  else if (insn->vectorExtensionType == TYPE_XOP) +    vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]); +  else +    return -1; + +  if (insn->mode != MODE_64BIT) +    vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later. + +  insn->vvvv = static_cast<Reg>(vvvv); +  return 0; +} + +/* + * readMaskRegister - Reads an mask register from the opcode field of an + *   instruction. + * + * @param insn    - The instruction whose opcode field is to be read. + * @return        - 0 on success; nonzero otherwise. + */ +static int readMaskRegister(struct InternalInstruction* insn) { +  dbgprintf(insn, "readMaskRegister()"); + +  if (insn->vectorExtensionType != TYPE_EVEX) +    return -1; + +  insn->writemask = +      static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3])); +  return 0; +} + +/* + * readOperands - Consults the specifier for an instruction and consumes all + *   operands for that instruction, interpreting them as it goes. + * + * @param insn  - The instruction whose operands are to be read and interpreted. + * @return      - 0 if all operands could be read; nonzero otherwise. + */ +static int readOperands(struct InternalInstruction* insn) { +  int hasVVVV, needVVVV; +  int sawRegImm = 0; + +  dbgprintf(insn, "readOperands()"); + +  /* If non-zero vvvv specified, need to make sure one of the operands +     uses it. */ +  hasVVVV = !readVVVV(insn); +  needVVVV = hasVVVV && (insn->vvvv != 0); + +  for (const auto &Op : x86OperandSets[insn->spec->operands]) { +    switch (Op.encoding) { +    case ENCODING_NONE: +    case ENCODING_SI: +    case ENCODING_DI: +      break; +    CASE_ENCODING_VSIB: +      // VSIB can use the V2 bit so check only the other bits. +      if (needVVVV) +        needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0); +      if (readModRM(insn)) +        return -1; + +      // Reject if SIB wasn't used. +      if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64) +        return -1; + +      // If sibIndex was set to SIB_INDEX_NONE, index offset is 4. +      if (insn->sibIndex == SIB_INDEX_NONE) +        insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4); + +      // If EVEX.v2 is set this is one of the 16-31 registers. +      if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT && +          v2FromEVEX4of4(insn->vectorExtensionPrefix[3])) +        insn->sibIndex = (SIBIndex)(insn->sibIndex + 16); + +      // Adjust the index register to the correct size. +      switch ((OperandType)Op.type) { +      default: +        debug("Unhandled VSIB index type"); +        return -1; +      case TYPE_MVSIBX: +        insn->sibIndex = (SIBIndex)(SIB_INDEX_XMM0 + +                                    (insn->sibIndex - insn->sibIndexBase)); +        break; +      case TYPE_MVSIBY: +        insn->sibIndex = (SIBIndex)(SIB_INDEX_YMM0 + +                                    (insn->sibIndex - insn->sibIndexBase)); +        break; +      case TYPE_MVSIBZ: +        insn->sibIndex = (SIBIndex)(SIB_INDEX_ZMM0 + +                                    (insn->sibIndex - insn->sibIndexBase)); +        break; +      } + +      // Apply the AVX512 compressed displacement scaling factor. +      if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) +        insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB); +      break; +    case ENCODING_REG: +    CASE_ENCODING_RM: +      if (readModRM(insn)) +        return -1; +      if (fixupReg(insn, &Op)) +        return -1; +      // Apply the AVX512 compressed displacement scaling factor. +      if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8) +        insn->displacement *= 1 << (Op.encoding - ENCODING_RM); +      break; +    case ENCODING_IB: +      if (sawRegImm) { +        /* Saw a register immediate so don't read again and instead split the +           previous immediate.  FIXME: This is a hack. */ +        insn->immediates[insn->numImmediatesConsumed] = +          insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; +        ++insn->numImmediatesConsumed; +        break; +      } +      if (readImmediate(insn, 1)) +        return -1; +      if (Op.type == TYPE_XMM || Op.type == TYPE_YMM) +        sawRegImm = 1; +      break; +    case ENCODING_IW: +      if (readImmediate(insn, 2)) +        return -1; +      break; +    case ENCODING_ID: +      if (readImmediate(insn, 4)) +        return -1; +      break; +    case ENCODING_IO: +      if (readImmediate(insn, 8)) +        return -1; +      break; +    case ENCODING_Iv: +      if (readImmediate(insn, insn->immediateSize)) +        return -1; +      break; +    case ENCODING_Ia: +      if (readImmediate(insn, insn->addressSize)) +        return -1; +      break; +    case ENCODING_IRC: +      insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) | +                 lFromEVEX4of4(insn->vectorExtensionPrefix[3]); +      break; +    case ENCODING_RB: +      if (readOpcodeRegister(insn, 1)) +        return -1; +      break; +    case ENCODING_RW: +      if (readOpcodeRegister(insn, 2)) +        return -1; +      break; +    case ENCODING_RD: +      if (readOpcodeRegister(insn, 4)) +        return -1; +      break; +    case ENCODING_RO: +      if (readOpcodeRegister(insn, 8)) +        return -1; +      break; +    case ENCODING_Rv: +      if (readOpcodeRegister(insn, 0)) +        return -1; +      break; +    case ENCODING_CC: +      insn->immediates[1] = insn->opcode & 0xf; +      break; +    case ENCODING_FP: +      break; +    case ENCODING_VVVV: +      needVVVV = 0; /* Mark that we have found a VVVV operand. */ +      if (!hasVVVV) +        return -1; +      if (insn->mode != MODE_64BIT) +        insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7); +      if (fixupReg(insn, &Op)) +        return -1; +      break; +    case ENCODING_WRITEMASK: +      if (readMaskRegister(insn)) +        return -1; +      break; +    case ENCODING_DUP: +      break; +    default: +      dbgprintf(insn, "Encountered an operand with an unknown encoding."); +      return -1; +    } +  } + +  /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ +  if (needVVVV) return -1; + +  return 0; +} + +/* + * decodeInstruction - Reads and interprets a full instruction provided by the + *   user. + * + * @param insn      - A pointer to the instruction to be populated.  Must be + *                    pre-allocated. + * @param reader    - The function to be used to read the instruction's bytes. + * @param readerArg - A generic argument to be passed to the reader to store + *                    any internal state. + * @param logger    - If non-NULL, the function to be used to write log messages + *                    and warnings. + * @param loggerArg - A generic argument to be passed to the logger to store + *                    any internal state. + * @param startLoc  - The address (in the reader's address space) of the first + *                    byte in the instruction. + * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to + *                    decode the instruction in. + * @return          - 0 if the instruction's memory could be read; nonzero if + *                    not. + */ +int llvm::X86Disassembler::decodeInstruction( +    struct InternalInstruction *insn, byteReader_t reader, +    const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg, +    uint64_t startLoc, DisassemblerMode mode) { +  memset(insn, 0, sizeof(struct InternalInstruction)); + +  insn->reader = reader; +  insn->readerArg = readerArg; +  insn->dlog = logger; +  insn->dlogArg = loggerArg; +  insn->startLocation = startLoc; +  insn->readerCursor = startLoc; +  insn->mode = mode; +  insn->numImmediatesConsumed = 0; + +  if (readPrefixes(insn)       || +      readOpcode(insn)         || +      getID(insn, miiArg)      || +      insn->instructionID == 0 || +      readOperands(insn)) +    return -1; + +  insn->operands = x86OperandSets[insn->spec->operands]; + +  insn->length = insn->readerCursor - insn->startLocation; + +  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", +            startLoc, insn->readerCursor, insn->length); + +  if (insn->length > 15) +    dbgprintf(insn, "Instruction exceeds 15-byte limit"); + +  return 0; +} diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h new file mode 100644 index 000000000000..7c0a42c019e3 --- /dev/null +++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -0,0 +1,695 @@ +//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of the X86 Disassembler. +// It contains the public interface of the instruction decoder. +// Documentation for the disassembler can be found in X86Disassembler.h. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H +#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/X86DisassemblerDecoderCommon.h" + +namespace llvm { +namespace X86Disassembler { + +// Accessor functions for various fields of an Intel instruction +#define modFromModRM(modRM)  (((modRM) & 0xc0) >> 6) +#define regFromModRM(modRM)  (((modRM) & 0x38) >> 3) +#define rmFromModRM(modRM)   ((modRM) & 0x7) +#define scaleFromSIB(sib)    (((sib) & 0xc0) >> 6) +#define indexFromSIB(sib)    (((sib) & 0x38) >> 3) +#define baseFromSIB(sib)     ((sib) & 0x7) +#define wFromREX(rex)        (((rex) & 0x8) >> 3) +#define rFromREX(rex)        (((rex) & 0x4) >> 2) +#define xFromREX(rex)        (((rex) & 0x2) >> 1) +#define bFromREX(rex)        ((rex) & 0x1) + +#define rFromEVEX2of4(evex)     (((~(evex)) & 0x80) >> 7) +#define xFromEVEX2of4(evex)     (((~(evex)) & 0x40) >> 6) +#define bFromEVEX2of4(evex)     (((~(evex)) & 0x20) >> 5) +#define r2FromEVEX2of4(evex)    (((~(evex)) & 0x10) >> 4) +#define mmFromEVEX2of4(evex)    ((evex) & 0x3) +#define wFromEVEX3of4(evex)     (((evex) & 0x80) >> 7) +#define vvvvFromEVEX3of4(evex)  (((~(evex)) & 0x78) >> 3) +#define ppFromEVEX3of4(evex)    ((evex) & 0x3) +#define zFromEVEX4of4(evex)     (((evex) & 0x80) >> 7) +#define l2FromEVEX4of4(evex)    (((evex) & 0x40) >> 6) +#define lFromEVEX4of4(evex)     (((evex) & 0x20) >> 5) +#define bFromEVEX4of4(evex)     (((evex) & 0x10) >> 4) +#define v2FromEVEX4of4(evex)    (((~evex) & 0x8) >> 3) +#define aaaFromEVEX4of4(evex)   ((evex) & 0x7) + +#define rFromVEX2of3(vex)       (((~(vex)) & 0x80) >> 7) +#define xFromVEX2of3(vex)       (((~(vex)) & 0x40) >> 6) +#define bFromVEX2of3(vex)       (((~(vex)) & 0x20) >> 5) +#define mmmmmFromVEX2of3(vex)   ((vex) & 0x1f) +#define wFromVEX3of3(vex)       (((vex) & 0x80) >> 7) +#define vvvvFromVEX3of3(vex)    (((~(vex)) & 0x78) >> 3) +#define lFromVEX3of3(vex)       (((vex) & 0x4) >> 2) +#define ppFromVEX3of3(vex)      ((vex) & 0x3) + +#define rFromVEX2of2(vex)       (((~(vex)) & 0x80) >> 7) +#define vvvvFromVEX2of2(vex)    (((~(vex)) & 0x78) >> 3) +#define lFromVEX2of2(vex)       (((vex) & 0x4) >> 2) +#define ppFromVEX2of2(vex)      ((vex) & 0x3) + +#define rFromXOP2of3(xop)       (((~(xop)) & 0x80) >> 7) +#define xFromXOP2of3(xop)       (((~(xop)) & 0x40) >> 6) +#define bFromXOP2of3(xop)       (((~(xop)) & 0x20) >> 5) +#define mmmmmFromXOP2of3(xop)   ((xop) & 0x1f) +#define wFromXOP3of3(xop)       (((xop) & 0x80) >> 7) +#define vvvvFromXOP3of3(vex)    (((~(vex)) & 0x78) >> 3) +#define lFromXOP3of3(xop)       (((xop) & 0x4) >> 2) +#define ppFromXOP3of3(xop)      ((xop) & 0x3) + +// These enums represent Intel registers for use by the decoder. +#define REGS_8BIT     \ +  ENTRY(AL)           \ +  ENTRY(CL)           \ +  ENTRY(DL)           \ +  ENTRY(BL)           \ +  ENTRY(AH)           \ +  ENTRY(CH)           \ +  ENTRY(DH)           \ +  ENTRY(BH)           \ +  ENTRY(R8B)          \ +  ENTRY(R9B)          \ +  ENTRY(R10B)         \ +  ENTRY(R11B)         \ +  ENTRY(R12B)         \ +  ENTRY(R13B)         \ +  ENTRY(R14B)         \ +  ENTRY(R15B)         \ +  ENTRY(SPL)          \ +  ENTRY(BPL)          \ +  ENTRY(SIL)          \ +  ENTRY(DIL) + +#define EA_BASES_16BIT  \ +  ENTRY(BX_SI)          \ +  ENTRY(BX_DI)          \ +  ENTRY(BP_SI)          \ +  ENTRY(BP_DI)          \ +  ENTRY(SI)             \ +  ENTRY(DI)             \ +  ENTRY(BP)             \ +  ENTRY(BX)             \ +  ENTRY(R8W)            \ +  ENTRY(R9W)            \ +  ENTRY(R10W)           \ +  ENTRY(R11W)           \ +  ENTRY(R12W)           \ +  ENTRY(R13W)           \ +  ENTRY(R14W)           \ +  ENTRY(R15W) + +#define REGS_16BIT    \ +  ENTRY(AX)           \ +  ENTRY(CX)           \ +  ENTRY(DX)           \ +  ENTRY(BX)           \ +  ENTRY(SP)           \ +  ENTRY(BP)           \ +  ENTRY(SI)           \ +  ENTRY(DI)           \ +  ENTRY(R8W)          \ +  ENTRY(R9W)          \ +  ENTRY(R10W)         \ +  ENTRY(R11W)         \ +  ENTRY(R12W)         \ +  ENTRY(R13W)         \ +  ENTRY(R14W)         \ +  ENTRY(R15W) + +#define EA_BASES_32BIT  \ +  ENTRY(EAX)            \ +  ENTRY(ECX)            \ +  ENTRY(EDX)            \ +  ENTRY(EBX)            \ +  ENTRY(sib)            \ +  ENTRY(EBP)            \ +  ENTRY(ESI)            \ +  ENTRY(EDI)            \ +  ENTRY(R8D)            \ +  ENTRY(R9D)            \ +  ENTRY(R10D)           \ +  ENTRY(R11D)           \ +  ENTRY(R12D)           \ +  ENTRY(R13D)           \ +  ENTRY(R14D)           \ +  ENTRY(R15D) + +#define REGS_32BIT  \ +  ENTRY(EAX)        \ +  ENTRY(ECX)        \ +  ENTRY(EDX)        \ +  ENTRY(EBX)        \ +  ENTRY(ESP)        \ +  ENTRY(EBP)        \ +  ENTRY(ESI)        \ +  ENTRY(EDI)        \ +  ENTRY(R8D)        \ +  ENTRY(R9D)        \ +  ENTRY(R10D)       \ +  ENTRY(R11D)       \ +  ENTRY(R12D)       \ +  ENTRY(R13D)       \ +  ENTRY(R14D)       \ +  ENTRY(R15D) + +#define EA_BASES_64BIT  \ +  ENTRY(RAX)            \ +  ENTRY(RCX)            \ +  ENTRY(RDX)            \ +  ENTRY(RBX)            \ +  ENTRY(sib64)          \ +  ENTRY(RBP)            \ +  ENTRY(RSI)            \ +  ENTRY(RDI)            \ +  ENTRY(R8)             \ +  ENTRY(R9)             \ +  ENTRY(R10)            \ +  ENTRY(R11)            \ +  ENTRY(R12)            \ +  ENTRY(R13)            \ +  ENTRY(R14)            \ +  ENTRY(R15) + +#define REGS_64BIT  \ +  ENTRY(RAX)        \ +  ENTRY(RCX)        \ +  ENTRY(RDX)        \ +  ENTRY(RBX)        \ +  ENTRY(RSP)        \ +  ENTRY(RBP)        \ +  ENTRY(RSI)        \ +  ENTRY(RDI)        \ +  ENTRY(R8)         \ +  ENTRY(R9)         \ +  ENTRY(R10)        \ +  ENTRY(R11)        \ +  ENTRY(R12)        \ +  ENTRY(R13)        \ +  ENTRY(R14)        \ +  ENTRY(R15) + +#define REGS_MMX  \ +  ENTRY(MM0)      \ +  ENTRY(MM1)      \ +  ENTRY(MM2)      \ +  ENTRY(MM3)      \ +  ENTRY(MM4)      \ +  ENTRY(MM5)      \ +  ENTRY(MM6)      \ +  ENTRY(MM7) + +#define REGS_XMM  \ +  ENTRY(XMM0)     \ +  ENTRY(XMM1)     \ +  ENTRY(XMM2)     \ +  ENTRY(XMM3)     \ +  ENTRY(XMM4)     \ +  ENTRY(XMM5)     \ +  ENTRY(XMM6)     \ +  ENTRY(XMM7)     \ +  ENTRY(XMM8)     \ +  ENTRY(XMM9)     \ +  ENTRY(XMM10)    \ +  ENTRY(XMM11)    \ +  ENTRY(XMM12)    \ +  ENTRY(XMM13)    \ +  ENTRY(XMM14)    \ +  ENTRY(XMM15)    \ +  ENTRY(XMM16)    \ +  ENTRY(XMM17)    \ +  ENTRY(XMM18)    \ +  ENTRY(XMM19)    \ +  ENTRY(XMM20)    \ +  ENTRY(XMM21)    \ +  ENTRY(XMM22)    \ +  ENTRY(XMM23)    \ +  ENTRY(XMM24)    \ +  ENTRY(XMM25)    \ +  ENTRY(XMM26)    \ +  ENTRY(XMM27)    \ +  ENTRY(XMM28)    \ +  ENTRY(XMM29)    \ +  ENTRY(XMM30)    \ +  ENTRY(XMM31) + +#define REGS_YMM  \ +  ENTRY(YMM0)     \ +  ENTRY(YMM1)     \ +  ENTRY(YMM2)     \ +  ENTRY(YMM3)     \ +  ENTRY(YMM4)     \ +  ENTRY(YMM5)     \ +  ENTRY(YMM6)     \ +  ENTRY(YMM7)     \ +  ENTRY(YMM8)     \ +  ENTRY(YMM9)     \ +  ENTRY(YMM10)    \ +  ENTRY(YMM11)    \ +  ENTRY(YMM12)    \ +  ENTRY(YMM13)    \ +  ENTRY(YMM14)    \ +  ENTRY(YMM15)    \ +  ENTRY(YMM16)    \ +  ENTRY(YMM17)    \ +  ENTRY(YMM18)    \ +  ENTRY(YMM19)    \ +  ENTRY(YMM20)    \ +  ENTRY(YMM21)    \ +  ENTRY(YMM22)    \ +  ENTRY(YMM23)    \ +  ENTRY(YMM24)    \ +  ENTRY(YMM25)    \ +  ENTRY(YMM26)    \ +  ENTRY(YMM27)    \ +  ENTRY(YMM28)    \ +  ENTRY(YMM29)    \ +  ENTRY(YMM30)    \ +  ENTRY(YMM31) + +#define REGS_ZMM  \ +  ENTRY(ZMM0)     \ +  ENTRY(ZMM1)     \ +  ENTRY(ZMM2)     \ +  ENTRY(ZMM3)     \ +  ENTRY(ZMM4)     \ +  ENTRY(ZMM5)     \ +  ENTRY(ZMM6)     \ +  ENTRY(ZMM7)     \ +  ENTRY(ZMM8)     \ +  ENTRY(ZMM9)     \ +  ENTRY(ZMM10)    \ +  ENTRY(ZMM11)    \ +  ENTRY(ZMM12)    \ +  ENTRY(ZMM13)    \ +  ENTRY(ZMM14)    \ +  ENTRY(ZMM15)    \ +  ENTRY(ZMM16)    \ +  ENTRY(ZMM17)    \ +  ENTRY(ZMM18)    \ +  ENTRY(ZMM19)    \ +  ENTRY(ZMM20)    \ +  ENTRY(ZMM21)    \ +  ENTRY(ZMM22)    \ +  ENTRY(ZMM23)    \ +  ENTRY(ZMM24)    \ +  ENTRY(ZMM25)    \ +  ENTRY(ZMM26)    \ +  ENTRY(ZMM27)    \ +  ENTRY(ZMM28)    \ +  ENTRY(ZMM29)    \ +  ENTRY(ZMM30)    \ +  ENTRY(ZMM31) + +#define REGS_MASKS \ +  ENTRY(K0)        \ +  ENTRY(K1)        \ +  ENTRY(K2)        \ +  ENTRY(K3)        \ +  ENTRY(K4)        \ +  ENTRY(K5)        \ +  ENTRY(K6)        \ +  ENTRY(K7) + +#define REGS_MASK_PAIRS \ +  ENTRY(K0_K1)     \ +  ENTRY(K2_K3)     \ +  ENTRY(K4_K5)     \ +  ENTRY(K6_K7) + +#define REGS_SEGMENT \ +  ENTRY(ES)          \ +  ENTRY(CS)          \ +  ENTRY(SS)          \ +  ENTRY(DS)          \ +  ENTRY(FS)          \ +  ENTRY(GS) + +#define REGS_DEBUG  \ +  ENTRY(DR0)        \ +  ENTRY(DR1)        \ +  ENTRY(DR2)        \ +  ENTRY(DR3)        \ +  ENTRY(DR4)        \ +  ENTRY(DR5)        \ +  ENTRY(DR6)        \ +  ENTRY(DR7)        \ +  ENTRY(DR8)        \ +  ENTRY(DR9)        \ +  ENTRY(DR10)       \ +  ENTRY(DR11)       \ +  ENTRY(DR12)       \ +  ENTRY(DR13)       \ +  ENTRY(DR14)       \ +  ENTRY(DR15) + +#define REGS_CONTROL  \ +  ENTRY(CR0)          \ +  ENTRY(CR1)          \ +  ENTRY(CR2)          \ +  ENTRY(CR3)          \ +  ENTRY(CR4)          \ +  ENTRY(CR5)          \ +  ENTRY(CR6)          \ +  ENTRY(CR7)          \ +  ENTRY(CR8)          \ +  ENTRY(CR9)          \ +  ENTRY(CR10)         \ +  ENTRY(CR11)         \ +  ENTRY(CR12)         \ +  ENTRY(CR13)         \ +  ENTRY(CR14)         \ +  ENTRY(CR15) + +#define REGS_BOUND    \ +  ENTRY(BND0)         \ +  ENTRY(BND1)         \ +  ENTRY(BND2)         \ +  ENTRY(BND3) + +#define ALL_EA_BASES  \ +  EA_BASES_16BIT      \ +  EA_BASES_32BIT      \ +  EA_BASES_64BIT + +#define ALL_SIB_BASES \ +  REGS_32BIT          \ +  REGS_64BIT + +#define ALL_REGS      \ +  REGS_8BIT           \ +  REGS_16BIT          \ +  REGS_32BIT          \ +  REGS_64BIT          \ +  REGS_MMX            \ +  REGS_XMM            \ +  REGS_YMM            \ +  REGS_ZMM            \ +  REGS_MASKS          \ +  REGS_MASK_PAIRS     \ +  REGS_SEGMENT        \ +  REGS_DEBUG          \ +  REGS_CONTROL        \ +  REGS_BOUND          \ +  ENTRY(RIP) + +/// All possible values of the base field for effective-address +/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte. +/// We distinguish between bases (EA_BASE_*) and registers that just happen +/// to be referred to when Mod == 0b11 (EA_REG_*). +enum EABase { +  EA_BASE_NONE, +#define ENTRY(x) EA_BASE_##x, +  ALL_EA_BASES +#undef ENTRY +#define ENTRY(x) EA_REG_##x, +  ALL_REGS +#undef ENTRY +  EA_max +}; + +/// All possible values of the SIB index field. +/// borrows entries from ALL_EA_BASES with the special case that +/// sib is synonymous with NONE. +/// Vector SIB: index can be XMM or YMM. +enum SIBIndex { +  SIB_INDEX_NONE, +#define ENTRY(x) SIB_INDEX_##x, +  ALL_EA_BASES +  REGS_XMM +  REGS_YMM +  REGS_ZMM +#undef ENTRY +  SIB_INDEX_max +}; + +/// All possible values of the SIB base field. +enum SIBBase { +  SIB_BASE_NONE, +#define ENTRY(x) SIB_BASE_##x, +  ALL_SIB_BASES +#undef ENTRY +  SIB_BASE_max +}; + +/// Possible displacement types for effective-address computations. +typedef enum { +  EA_DISP_NONE, +  EA_DISP_8, +  EA_DISP_16, +  EA_DISP_32 +} EADisplacement; + +/// All possible values of the reg field in the ModR/M byte. +enum Reg { +#define ENTRY(x) MODRM_REG_##x, +  ALL_REGS +#undef ENTRY +  MODRM_REG_max +}; + +/// All possible segment overrides. +enum SegmentOverride { +  SEG_OVERRIDE_NONE, +  SEG_OVERRIDE_CS, +  SEG_OVERRIDE_SS, +  SEG_OVERRIDE_DS, +  SEG_OVERRIDE_ES, +  SEG_OVERRIDE_FS, +  SEG_OVERRIDE_GS, +  SEG_OVERRIDE_max +}; + +/// Possible values for the VEX.m-mmmm field +enum VEXLeadingOpcodeByte { +  VEX_LOB_0F = 0x1, +  VEX_LOB_0F38 = 0x2, +  VEX_LOB_0F3A = 0x3 +}; + +enum XOPMapSelect { +  XOP_MAP_SELECT_8 = 0x8, +  XOP_MAP_SELECT_9 = 0x9, +  XOP_MAP_SELECT_A = 0xA +}; + +/// Possible values for the VEX.pp/EVEX.pp field +enum VEXPrefixCode { +  VEX_PREFIX_NONE = 0x0, +  VEX_PREFIX_66 = 0x1, +  VEX_PREFIX_F3 = 0x2, +  VEX_PREFIX_F2 = 0x3 +}; + +enum VectorExtensionType { +  TYPE_NO_VEX_XOP   = 0x0, +  TYPE_VEX_2B       = 0x1, +  TYPE_VEX_3B       = 0x2, +  TYPE_EVEX         = 0x3, +  TYPE_XOP          = 0x4 +}; + +/// Type for the byte reader that the consumer must provide to +/// the decoder. Reads a single byte from the instruction's address space. +/// \param arg     A baton that the consumer can associate with any internal +///                state that it needs. +/// \param byte    A pointer to a single byte in memory that should be set to +///                contain the value at address. +/// \param address The address in the instruction's address space that should +///                be read from. +/// \return        -1 if the byte cannot be read for any reason; 0 otherwise. +typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address); + +/// Type for the logging function that the consumer can provide to +/// get debugging output from the decoder. +/// \param arg A baton that the consumer can associate with any internal +///            state that it needs. +/// \param log A string that contains the message.  Will be reused after +///            the logger returns. +typedef void (*dlog_t)(void *arg, const char *log); + +/// The specification for how to extract and interpret a full instruction and +/// its operands. +struct InstructionSpecifier { +  uint16_t operands; +}; + +/// The x86 internal instruction, which is produced by the decoder. +struct InternalInstruction { +  // Reader interface (C) +  byteReader_t reader; +  // Opaque value passed to the reader +  const void* readerArg; +  // The address of the next byte to read via the reader +  uint64_t readerCursor; + +  // Logger interface (C) +  dlog_t dlog; +  // Opaque value passed to the logger +  void* dlogArg; + +  // General instruction information + +  // The mode to disassemble for (64-bit, protected, real) +  DisassemblerMode mode; +  // The start of the instruction, usable with the reader +  uint64_t startLocation; +  // The length of the instruction, in bytes +  size_t length; + +  // Prefix state + +  // The possible mandatory prefix +  uint8_t mandatoryPrefix; +  // The value of the vector extension prefix(EVEX/VEX/XOP), if present +  uint8_t vectorExtensionPrefix[4]; +  // The type of the vector extension prefix +  VectorExtensionType vectorExtensionType; +  // The value of the REX prefix, if present +  uint8_t rexPrefix; +  // The segment override type +  SegmentOverride segmentOverride; +  // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease +  bool xAcquireRelease; + +  // Address-size override +  bool hasAdSize; +  // Operand-size override +  bool hasOpSize; +  // Lock prefix +  bool hasLockPrefix; +  // The repeat prefix if any +  uint8_t repeatPrefix; + +  // Sizes of various critical pieces of data, in bytes +  uint8_t registerSize; +  uint8_t addressSize; +  uint8_t displacementSize; +  uint8_t immediateSize; + +  // Offsets from the start of the instruction to the pieces of data, which is +  // needed to find relocation entries for adding symbolic operands. +  uint8_t displacementOffset; +  uint8_t immediateOffset; + +  // opcode state + +  // The last byte of the opcode, not counting any ModR/M extension +  uint8_t opcode; + +  // decode state + +  // The type of opcode, used for indexing into the array of decode tables +  OpcodeType opcodeType; +  // The instruction ID, extracted from the decode table +  uint16_t instructionID; +  // The specifier for the instruction, from the instruction info table +  const InstructionSpecifier *spec; + +  // state for additional bytes, consumed during operand decode.  Pattern: +  // consumed___ indicates that the byte was already consumed and does not +  // need to be consumed again. + +  // The VEX.vvvv field, which contains a third register operand for some AVX +  // instructions. +  Reg                           vvvv; + +  // The writemask for AVX-512 instructions which is contained in EVEX.aaa +  Reg                           writemask; + +  // The ModR/M byte, which contains most register operands and some portion of +  // all memory operands. +  bool                          consumedModRM; +  uint8_t                       modRM; + +  // The SIB byte, used for more complex 32- or 64-bit memory operands +  bool                          consumedSIB; +  uint8_t                       sib; + +  // The displacement, used for memory operands +  bool                          consumedDisplacement; +  int32_t                       displacement; + +  // Immediates.  There can be two in some cases +  uint8_t                       numImmediatesConsumed; +  uint8_t                       numImmediatesTranslated; +  uint64_t                      immediates[2]; + +  // A register or immediate operand encoded into the opcode +  Reg                           opcodeRegister; + +  // Portions of the ModR/M byte + +  // These fields determine the allowable values for the ModR/M fields, which +  // depend on operand and address widths. +  EABase                        eaRegBase; +  Reg                           regBase; + +  // The Mod and R/M fields can encode a base for an effective address, or a +  // register.  These are separated into two fields here. +  EABase                        eaBase; +  EADisplacement                eaDisplacement; +  // The reg field always encodes a register +  Reg                           reg; + +  // SIB state +  SIBIndex                      sibIndexBase; +  SIBIndex                      sibIndex; +  uint8_t                       sibScale; +  SIBBase                       sibBase; + +  // Embedded rounding control. +  uint8_t                       RC; + +  ArrayRef<OperandSpecifier> operands; +}; + +/// Decode one instruction and store the decoding results in +/// a buffer provided by the consumer. +/// \param insn      The buffer to store the instruction in.  Allocated by the +///                  consumer. +/// \param reader    The byteReader_t for the bytes to be read. +/// \param readerArg An argument to pass to the reader for storing context +///                  specific to the consumer.  May be NULL. +/// \param logger    The dlog_t to be used in printing status messages from the +///                  disassembler.  May be NULL. +/// \param loggerArg An argument to pass to the logger for storing context +///                  specific to the logger.  May be NULL. +/// \param startLoc  The address (in the reader's address space) of the first +///                  byte in the instruction. +/// \param mode      The mode (16-bit, 32-bit, 64-bit) to decode in. +/// \return          Nonzero if there was an error during decode, 0 otherwise. +int decodeInstruction(InternalInstruction *insn, +                      byteReader_t reader, +                      const void *readerArg, +                      dlog_t logger, +                      void *loggerArg, +                      const void *miiArg, +                      uint64_t startLoc, +                      DisassemblerMode mode); + +/// Print a message to debugs() +/// \param file The name of the file printing the debug message. +/// \param line The line number that printed the debug message. +/// \param s    The message to print. +void Debug(const char *file, unsigned line, const char *s); + +StringRef GetInstrName(unsigned Opcode, const void *mii); + +} // namespace X86Disassembler +} // namespace llvm + +#endif | 
