diff options
Diffstat (limited to 'lib/Target/X86/Disassembler')
-rw-r--r-- | lib/Target/X86/Disassembler/LLVMBuild.txt | 2 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 211 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.h | 16 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp | 171 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 23 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 35 |
6 files changed, 268 insertions, 190 deletions
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt index cac7adff4922..e003fc9f996e 100644 --- a/lib/Target/X86/Disassembler/LLVMBuild.txt +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = X86Disassembler parent = X86 -required_libraries = MC Support X86Info +required_libraries = MCDisassembler Support X86Info add_to_library_groups = X86 diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 521bd21b81c6..1c5618288e75 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -23,7 +23,6 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -52,8 +51,8 @@ const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode, #define debug(s) DEBUG(Debug(__FILE__, __LINE__, s)); -namespace llvm { - +namespace llvm { + // Fill-ins to make the compiler happy. These constants are never actually // assigned; they are just filler to make an automatically-generated switch // statement work. @@ -97,16 +96,26 @@ X86GenericDisassembler::X86GenericDisassembler( } } -/// regionReader - a callback function that wraps the readByte method from -/// MemoryObject. +struct Region { + ArrayRef<uint8_t> Bytes; + uint64_t Base; + Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {} +}; + +/// A callback function that wraps the readByte method from Region. /// -/// @param arg - The generic callback parameter. In this case, this should -/// be a pointer to a MemoryObject. -/// @param byte - A pointer to the byte to be read. -/// @param address - The address to be read. -static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { - const MemoryObject* region = static_cast<const MemoryObject*>(arg); - return region->readByte(address, byte); +/// @param Arg - The generic callback parameter. In this case, this should +/// be a pointer to a Region. +/// @param Byte - A pointer to the byte to be read. +/// @param Address - The address to be read. +static int regionReader(const void *Arg, uint8_t *Byte, uint64_t Address) { + auto *R = static_cast<const Region *>(Arg); + ArrayRef<uint8_t> Bytes = R->Bytes; + unsigned Index = Address - R->Base; + if (Bytes.size() <= Index) + return -1; + *Byte = Bytes[Index]; + return 0; } /// logger - a callback function that wraps the operator<< method from @@ -118,47 +127,38 @@ static int regionReader(const void* arg, uint8_t* byte, uint64_t address) { static void logger(void* arg, const char* log) { if (!arg) return; - + raw_ostream &vStream = *(static_cast<raw_ostream*>(arg)); vStream << log << "\n"; -} - +} + // // Public interface for the disassembler // -MCDisassembler::DecodeStatus -X86GenericDisassembler::getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const { - CommentStream = &cStream; - - InternalInstruction internalInstr; - - dlog_t loggerFn = logger; - if (&vStream == &nulls()) - loggerFn = nullptr; // Disable logging completely if it's going to nulls(). - - int ret = decodeInstruction(&internalInstr, - regionReader, - (const void*)®ion, - loggerFn, - (void*)&vStream, - (const void*)MII.get(), - address, - fMode); - - if (ret) { - size = internalInstr.readerCursor - address; +MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( + MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, raw_ostream &CStream) const { + CommentStream = &CStream; + + InternalInstruction InternalInstr; + + dlog_t LoggerFn = logger; + if (&VStream == &nulls()) + LoggerFn = nullptr; // Disable logging completely if it's going to nulls(). + + Region R(Bytes, Address); + + int Ret = decodeInstruction(&InternalInstr, regionReader, (const void *)&R, + LoggerFn, (void *)&VStream, + (const void *)MII.get(), Address, fMode); + + if (Ret) { + Size = InternalInstr.readerCursor - Address; return Fail; - } - else { - size = internalInstr.length; - return (!translateInstruction(instr, internalInstr, this)) ? - Success : Fail; + } else { + Size = InternalInstr.length; + return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; } } @@ -184,7 +184,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) { } /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the -/// immediate Value in the MCInst. +/// immediate Value in the MCInst. /// /// @param Value - The immediate Value, has had any PC adjustment made by /// the caller. @@ -196,7 +196,7 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was /// called then that function is called to get any symbolic information for the /// immediate in the instruction using the Address, Offset and Width. If that -/// returns non-zero then the symbolic information it returns is used to create +/// returns non-zero then the symbolic information it returns is used to create /// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() /// returns zero and isBranch is true then a symbol look up for immediate Value /// is done and if a symbol is found an MCExpr is created with that, else @@ -204,8 +204,8 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// if it adds an operand to the MCInst and false otherwise. static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, uint64_t Address, uint64_t Offset, - uint64_t Width, MCInst &MI, - const MCDisassembler *Dis) { + uint64_t Width, MCInst &MI, + const MCDisassembler *Dis) { return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, Width); } @@ -215,7 +215,7 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, /// These can often be addresses in a literal pool. The Address of the /// instruction and its immediate Value are used to determine the address /// being referenced in the literal pool entry. The SymbolLookUp call back will -/// return a pointer to a literal 'C' string if the referenced address is an +/// return a pointer to a literal 'C' string if the referenced address is an /// address into a section with 'C' string literals. static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, const void *Decoder) { @@ -287,7 +287,7 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { // Sign-extend the immediate if necessary. OperandType type = (OperandType)operand.type; @@ -350,6 +350,54 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case ENCODING_IO: break; } + } else if (type == TYPE_IMM3) { + // Check for immediates that printSSECC can't handle. + if (immediate >= 8) { + unsigned NewOpc; + switch (mcInst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case X86::CMPPDrmi: NewOpc = X86::CMPPDrmi_alt; break; + case X86::CMPPDrri: NewOpc = X86::CMPPDrri_alt; break; + case X86::CMPPSrmi: NewOpc = X86::CMPPSrmi_alt; break; + case X86::CMPPSrri: NewOpc = X86::CMPPSrri_alt; break; + case X86::CMPSDrm: NewOpc = X86::CMPSDrm_alt; break; + case X86::CMPSDrr: NewOpc = X86::CMPSDrr_alt; break; + case X86::CMPSSrm: NewOpc = X86::CMPSSrm_alt; break; + case X86::CMPSSrr: NewOpc = X86::CMPSSrr_alt; break; + } + // Switch opcode to the one that doesn't get special printing. + mcInst.setOpcode(NewOpc); + } + } else if (type == TYPE_IMM5) { + // Check for immediates that printAVXCC can't handle. + if (immediate >= 32) { + unsigned NewOpc; + switch (mcInst.getOpcode()) { + default: llvm_unreachable("unexpected opcode"); + case X86::VCMPPDrmi: NewOpc = X86::VCMPPDrmi_alt; break; + case X86::VCMPPDrri: NewOpc = X86::VCMPPDrri_alt; break; + case X86::VCMPPSrmi: NewOpc = X86::VCMPPSrmi_alt; break; + case X86::VCMPPSrri: NewOpc = X86::VCMPPSrri_alt; break; + case X86::VCMPSDrm: NewOpc = X86::VCMPSDrm_alt; break; + case X86::VCMPSDrr: NewOpc = X86::VCMPSDrr_alt; break; + case X86::VCMPSSrm: NewOpc = X86::VCMPSSrm_alt; break; + case X86::VCMPSSrr: NewOpc = X86::VCMPSSrr_alt; break; + case X86::VCMPPDYrmi: NewOpc = X86::VCMPPDYrmi_alt; break; + case X86::VCMPPDYrri: NewOpc = X86::VCMPPDYrri_alt; break; + case X86::VCMPPSYrmi: NewOpc = X86::VCMPPSYrmi_alt; break; + case X86::VCMPPSYrri: NewOpc = X86::VCMPPSYrri_alt; break; + case X86::VCMPPDZrmi: NewOpc = X86::VCMPPDZrmi_alt; break; + case X86::VCMPPDZrri: NewOpc = X86::VCMPPDZrri_alt; break; + case X86::VCMPPSZrmi: NewOpc = X86::VCMPPSZrmi_alt; break; + case X86::VCMPPSZrri: NewOpc = X86::VCMPPSZrri_alt; break; + case X86::VCMPSDZrm: NewOpc = X86::VCMPSDZrmi_alt; break; + case X86::VCMPSDZrr: NewOpc = X86::VCMPSDZrri_alt; break; + case X86::VCMPSSZrm: NewOpc = X86::VCMPSSZrmi_alt; break; + case X86::VCMPSSZrr: NewOpc = X86::VCMPSSZrri_alt; break; + } + // Switch opcode to the one that doesn't get special printing. + mcInst.setOpcode(NewOpc); + } } switch (type) { @@ -407,7 +455,7 @@ static bool translateRMRegister(MCInst &mcInst, debug("A R/M register operand may not have a SIB byte"); return true; } - + switch (insn.eaBase) { default: debug("Unexpected EA base register"); @@ -427,7 +475,7 @@ static bool translateRMRegister(MCInst &mcInst, ALL_REGS #undef ENTRY } - + return false; } @@ -440,26 +488,26 @@ static bool translateRMRegister(MCInst &mcInst, /// from. /// @return - 0 on success; nonzero otherwise static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { // Addresses in an MCInst are represented as five operands: - // 1. basereg (register) The R/M base, or (if there is a SIB) the + // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base - // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified + // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified // scale amount // 3. indexreg (register) x86_registerNONE, or (if there is a SIB) - // the index (which is multiplied by the + // the index (which is multiplied by the // scale amount) // 4. displacement (immediate) 0, or the displacement if there is one // 5. segmentreg (register) x86_registerNONE for now, but could be set // if we have segment overrides - + MCOperand baseReg; MCOperand scaleAmount; MCOperand indexReg; MCOperand displacement; MCOperand segmentReg; uint64_t pcrel = 0; - + if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { if (insn.sibBase != SIB_BASE_NONE) { switch (insn.sibBase) { @@ -512,7 +560,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, (insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX); SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 : IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0; - insn.sibIndex = (SIBIndex)(IndexBase + + insn.sibIndex = (SIBIndex)(IndexBase + (insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset)); } @@ -534,7 +582,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else { indexReg = MCOperand::CreateReg(0); } - + scaleAmount = MCOperand::CreateImm(insn.sibScale); } else { switch (insn.eaBase) { @@ -553,7 +601,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, } else baseReg = MCOperand::CreateReg(0); - + indexReg = MCOperand::CreateReg(0); break; case EA_BASE_BX_SI: @@ -584,7 +632,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, // placeholders to keep the compiler happy. #define ENTRY(x) \ case EA_BASE_##x: \ - baseReg = MCOperand::CreateReg(X86::x); break; + baseReg = MCOperand::CreateReg(X86::x); break; ALL_EA_BASES #undef ENTRY #define ENTRY(x) case EA_REG_##x: @@ -595,14 +643,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, return true; } } - + scaleAmount = MCOperand::CreateImm(1); } - + displacement = MCOperand::CreateImm(insn.displacement); segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]); - + mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); @@ -623,7 +671,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, /// from. /// @return - 0 on success; nonzero otherwise static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn, const MCDisassembler *Dis) { + InternalInstruction &insn, const MCDisassembler *Dis) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -633,8 +681,6 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_R32: case TYPE_R64: case TYPE_Rv: - case TYPE_MM: - case TYPE_MM32: case TYPE_MM64: case TYPE_XMM: case TYPE_XMM32: @@ -660,9 +706,6 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M32FP: case TYPE_M64FP: case TYPE_M80FP: - case TYPE_M16INT: - case TYPE_M32INT: - case TYPE_M64INT: case TYPE_M1616: case TYPE_M1632: case TYPE_M1664: @@ -670,7 +713,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, return translateRMMemory(mcInst, insn, Dis); } } - + /// translateFPRegister - Translates a stack position on the FPU stack to its /// LLVM form, and appends it to an MCInst. /// @@ -698,7 +741,7 @@ static bool translateMaskRegister(MCInst &mcInst, return false; } -/// translateOperand - Translates an operand stored in an internal instruction +/// translateOperand - Translates an operand stored in an internal instruction /// to LLVM's format and appends it to an MCInst. /// /// @param mcInst - The MCInst to append to. @@ -707,7 +750,7 @@ static bool translateMaskRegister(MCInst &mcInst, /// @return - false on success; true otherwise. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); @@ -761,7 +804,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, insn, Dis); } } - + /// translateInstruction - Translates an internal instruction and all its /// operands to an MCInst. /// @@ -770,12 +813,12 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, /// @return - false on success; true otherwise. static bool translateInstruction(MCInst &mcInst, InternalInstruction &insn, - const MCDisassembler *Dis) { + const MCDisassembler *Dis) { if (!insn.spec) { debug("Instruction has no specification"); return true; } - + mcInst.setOpcode(insn.instructionID); // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3 // prefix bytes should be disassembled as xrelease and xacquire then set the @@ -786,9 +829,9 @@ static bool translateInstruction(MCInst &mcInst, else if(mcInst.getOpcode() == X86::REPNE_PREFIX) mcInst.setOpcode(X86::XACQUIRE_PREFIX); } - + insn.numImmediatesTranslated = 0; - + for (const auto &Op : insn.operands) { if (Op.encoding != ENCODING_NONE) { if (translateOperand(mcInst, Op, insn, Dis)) { @@ -796,7 +839,7 @@ static bool translateInstruction(MCInst &mcInst, } } } - + return false; } @@ -807,9 +850,9 @@ static MCDisassembler *createX86Disassembler(const Target &T, return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII)); } -extern "C" void LLVMInitializeX86Disassembler() { +extern "C" void LLVMInitializeX86Disassembler() { // Register the disassembler. - TargetRegistry::RegisterMCDisassembler(TheX86_32Target, + TargetRegistry::RegisterMCDisassembler(TheX86_32Target, createX86Disassembler); TargetRegistry::RegisterMCDisassembler(TheX86_64Target, createX86Disassembler); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 4dc7c29078fc..d7f426b2641d 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -71,8 +71,8 @@ // //===----------------------------------------------------------------------===// -#ifndef X86DISASSEMBLER_H -#define X86DISASSEMBLER_H +#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H +#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLER_H #include "X86DisassemblerDecoderCommon.h" #include "llvm/MC/MCDisassembler.h" @@ -87,21 +87,17 @@ class raw_ostream; namespace X86Disassembler { -/// X86GenericDisassembler - Generic disassembler for all X86 platforms. -/// All each platform class should have to do is subclass the constructor, and -/// provide a different disassemblerMode value. +/// Generic disassembler for all X86 platforms. All each platform class should +/// have to do is subclass the constructor, and provide a different +/// disassemblerMode value. class X86GenericDisassembler : public MCDisassembler { std::unique_ptr<const MCInstrInfo> MII; public: - /// Constructor - Initializes the disassembler. - /// X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, std::unique_ptr<const MCInstrInfo> MII); public: - - /// getInstruction - See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, uint64_t address, + ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &vStream, raw_ostream &cStream) const override; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index ab3d1f774bc7..619a0d4dd65e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1,4 +1,4 @@ -//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===// +//===-- X86DisassemblerDecoder.cpp - Disassembler decoder -----------------===// // // The LLVM Compiler Infrastructure // @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include <stdarg.h> /* for va_*() */ -#include <stdio.h> /* for vsnprintf() */ -#include <stdlib.h> /* for exit() */ -#include <string.h> /* for memset() */ +#include <cstdarg> /* for va_*() */ +#include <cstdio> /* for vsnprintf() */ +#include <cstdlib> /* for exit() */ +#include <cstring> /* for memset() */ #include "X86DisassemblerDecoder.h" @@ -472,8 +472,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { insn->vectorExtensionType = TYPE_EVEX; - } - else { + } else { unconsumeByte(insn); /* unconsume byte1 */ unconsumeByte(insn); /* unconsume byte */ insn->necessaryPrefixLocation = insn->readerCursor - 2; @@ -504,8 +503,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); } - } - else if (byte == 0xc4) { + } else if (byte == 0xc4) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { @@ -516,8 +514,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vectorExtensionType = TYPE_VEX_3B; insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - else { + } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -541,8 +538,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2]); } - } - else if (byte == 0xc5) { + } else if (byte == 0xc5) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { @@ -552,8 +548,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vectorExtensionType = TYPE_VEX_2B; - } - else { + } else { unconsumeByte(insn); } @@ -566,8 +561,7 @@ static int readPrefixes(struct InternalInstruction* insn) { | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); } - switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) - { + switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { default: break; case VEX_PREFIX_66: @@ -579,8 +573,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1]); } - } - else if (byte == 0x8f) { + } else if (byte == 0x8f) { uint8_t byte1; if (lookAtByte(insn, &byte1)) { @@ -591,8 +584,7 @@ static int readPrefixes(struct InternalInstruction* insn) { if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ insn->vectorExtensionType = TYPE_XOP; insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - else { + } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -612,8 +604,7 @@ static int readPrefixes(struct InternalInstruction* insn) { | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); } - switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) - { + switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { default: break; case VEX_PREFIX_66: @@ -625,8 +616,7 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2]); } - } - else { + } else { if (insn->mode == MODE_64BIT) { if ((byte & 0xf0) == 0x40) { uint8_t opcodeByte; @@ -698,8 +688,7 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = ONEBYTE; - if (insn->vectorExtensionType == TYPE_EVEX) - { + if (insn->vectorExtensionType == TYPE_EVEX) { switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled mm field for instruction (0x%hhx)", @@ -715,8 +704,7 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = THREEBYTE_3A; return consumeByte(insn, &insn->opcode); } - } - else if (insn->vectorExtensionType == TYPE_VEX_3B) { + } else if (insn->vectorExtensionType == TYPE_VEX_3B) { switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", @@ -732,12 +720,10 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = THREEBYTE_3A; return consumeByte(insn, &insn->opcode); } - } - else if (insn->vectorExtensionType == TYPE_VEX_2B) { + } else if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->opcodeType = TWOBYTE; return consumeByte(insn, &insn->opcode); - } - else if (insn->vectorExtensionType == TYPE_XOP) { + } else if (insn->vectorExtensionType == TYPE_XOP) { switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) { default: dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", @@ -866,6 +852,22 @@ static bool is16BitEquivalent(const char* orig, const char* equiv) { } /* + * is64Bit - Determines whether this instruction is a 64-bit instruction. + * + * @param name - The instruction that is not 16-bit + */ +static bool is64Bit(const char* name) { + off_t i; + + for (i = 0;; ++i) { + if (name[i] == '\0') + return false; + if (name[i] == '6' && name[i+1] == '4') + return true; + } +} + +/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. @@ -911,8 +913,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { attrMask |= ATTR_EVEXL; if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3])) attrMask |= ATTR_EVEXL2; - } - else if (insn->vectorExtensionType == TYPE_VEX_3B) { + } else if (insn->vectorExtensionType == TYPE_VEX_3B) { switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; @@ -927,8 +928,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { if (lFromVEX3of3(insn->vectorExtensionPrefix[2])) attrMask |= ATTR_VEXL; - } - else if (insn->vectorExtensionType == TYPE_VEX_2B) { + } else if (insn->vectorExtensionType == TYPE_VEX_2B) { switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; @@ -943,8 +943,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { if (lFromVEX2of2(insn->vectorExtensionPrefix[1])) attrMask |= ATTR_VEXL; - } - else if (insn->vectorExtensionType == TYPE_XOP) { + } else if (insn->vectorExtensionType == TYPE_XOP) { switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { case VEX_PREFIX_66: attrMask |= ATTR_OPSIZE; @@ -959,12 +958,10 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { if (lFromXOP3of3(insn->vectorExtensionPrefix[2])) attrMask |= ATTR_VEXL; - } - else { + } else { return -1; } - } - else { + } else { if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) @@ -978,29 +975,75 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; - if (getIDWithAttrMask(&instructionID, insn, attrMask)) - return -1; - /* * JCXZ/JECXZ need special handling for 16-bit mode because the meaning * of the AdSize prefix is inverted w.r.t. 32-bit mode. */ - if (insn->mode == MODE_16BIT && insn->opcode == 0xE3) { - const struct InstructionSpecifier *spec; - spec = specifierForUID(instructionID); + if (insn->mode == MODE_16BIT && insn->opcodeType == ONEBYTE && + insn->opcode == 0xE3) + attrMask ^= ATTR_ADSIZE; + if (getIDWithAttrMask(&instructionID, insn, attrMask)) + return -1; + + /* The following clauses compensate for limitations of the tables. */ + + if (insn->mode != MODE_64BIT && + insn->vectorExtensionType != TYPE_NO_VEX_XOP) { /* - * Check for Ii8PCRel instructions. We could alternatively do a - * string-compare on the names, but this is probably cheaper. + * The tables can't distinquish between cases where the W-bit is used to + * select register size and cases where its a required part of the opcode. */ - if (x86OperandSets[spec->operands][0].type == TYPE_REL8) { - attrMask ^= ATTR_ADSIZE; - if (getIDWithAttrMask(&instructionID, insn, attrMask)) - return -1; + if ((insn->vectorExtensionType == TYPE_EVEX && + wFromEVEX3of4(insn->vectorExtensionPrefix[2])) || + (insn->vectorExtensionType == TYPE_VEX_3B && + wFromVEX3of3(insn->vectorExtensionPrefix[2])) || + (insn->vectorExtensionType == TYPE_XOP && + wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { + + uint16_t instructionIDWithREXW; + if (getIDWithAttrMask(&instructionIDWithREXW, + insn, attrMask | ATTR_REXW)) { + insn->instructionID = instructionID; + insn->spec = specifierForUID(instructionID); + return 0; + } + + const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg); + // If not a 64-bit instruction. Switch the opcode. + if (!is64Bit(SpecName)) { + insn->instructionID = instructionIDWithREXW; + insn->spec = specifierForUID(instructionIDWithREXW); + return 0; + } } } - /* The following clauses compensate for limitations of the tables. */ + /* + * Absolute moves need special handling. + * -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are + * inverted w.r.t. + * -For 32-bit mode we need to ensure the ADSIZE prefix is observed in + * any position. + */ + if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { + /* Make sure we observed the prefixes in any position. */ + if (insn->prefixPresent[0x67]) + attrMask |= ATTR_ADSIZE; + if (insn->prefixPresent[0x66]) + attrMask |= ATTR_OPSIZE; + + /* In 16-bit, invert the attributes. */ + if (insn->mode == MODE_16BIT) + attrMask ^= ATTR_ADSIZE | ATTR_OPSIZE; + + if (getIDWithAttrMask(&instructionID, insn, attrMask)) + return -1; + + insn->instructionID = instructionID; + insn->spec = specifierForUID(instructionID); + return 0; + } if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && !(attrMask & ATTR_OPSIZE)) { @@ -1417,22 +1460,14 @@ static int readModRM(struct InternalInstruction* insn) { case TYPE_VK16: \ return prefix##_K0 + index; \ case TYPE_MM64: \ - case TYPE_MM32: \ - case TYPE_MM: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_MM0 + index; \ + return prefix##_MM0 + (index & 0x7); \ case TYPE_SEGMENTREG: \ if (index > 5) \ *valid = 0; \ return prefix##_ES + index; \ case TYPE_DEBUGREG: \ - if (index > 7) \ - *valid = 0; \ return prefix##_DR0 + index; \ case TYPE_CONTROLREG: \ - if (index > 8) \ - *valid = 0; \ return prefix##_CR0 + index; \ } \ } @@ -1709,12 +1744,6 @@ static int readOperands(struct InternalInstruction* insn) { } if (readImmediate(insn, 1)) return -1; - if (Op.type == TYPE_IMM3 && - insn->immediates[insn->numImmediatesConsumed - 1] > 7) - return -1; - if (Op.type == TYPE_IMM5 && - insn->immediates[insn->numImmediatesConsumed - 1] > 31) - return -1; if (Op.type == TYPE_XMM128 || Op.type == TYPE_XMM256) sawRegImm = 1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 8c45402ab5e1..a79a923ac525 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef X86DISASSEMBLERDECODER_H -#define X86DISASSEMBLERDECODER_H +#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H +#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODER_H #include "X86DisassemblerDecoderCommon.h" #include "llvm/ADT/ArrayRef.h" @@ -341,7 +341,15 @@ namespace X86Disassembler { ENTRY(DR4) \ ENTRY(DR5) \ ENTRY(DR6) \ - ENTRY(DR7) + ENTRY(DR7) \ + ENTRY(DR8) \ + ENTRY(DR9) \ + ENTRY(DR10) \ + ENTRY(DR11) \ + ENTRY(DR12) \ + ENTRY(DR13) \ + ENTRY(DR14) \ + ENTRY(DR15) #define REGS_CONTROL \ ENTRY(CR0) \ @@ -352,7 +360,14 @@ namespace X86Disassembler { ENTRY(CR5) \ ENTRY(CR6) \ ENTRY(CR7) \ - ENTRY(CR8) + ENTRY(CR8) \ + ENTRY(CR9) \ + ENTRY(CR10) \ + ENTRY(CR11) \ + ENTRY(CR12) \ + ENTRY(CR13) \ + ENTRY(CR14) \ + ENTRY(CR15) #define ALL_EA_BASES \ EA_BASES_16BIT \ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 13a7b557b440..1f8f9da5250e 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef X86DISASSEMBLERDECODERCOMMON_H -#define X86DISASSEMBLERDECODERCOMMON_H +#ifndef LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H +#define LLVM_LIB_TARGET_X86_DISASSEMBLER_X86DISASSEMBLERDECODERCOMMON_H #include "llvm/Support/DataTypes.h" @@ -82,6 +82,7 @@ enum attributeBits { "operands change width") \ ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \ "operands change width") \ + ENUM_ENTRY(IC_OPSIZE_ADSIZE, 4, "requires ADSIZE and OPSIZE prefixes") \ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ @@ -90,20 +91,24 @@ enum attributeBits { "operands change width") \ ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ "operands change width") \ - ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ + ENUM_ENTRY(IC_64BIT_REXW, 5, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_REXW_ADSIZE, 6, "requires a REX.W prefix and 0x67 " \ + "prefix") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \ - ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ + ENUM_ENTRY(IC_64BIT_OPSIZE_ADSIZE, 4, "Just as meaningful as IC_OPSIZE/" \ + "IC_ADSIZE") \ + ENUM_ENTRY(IC_64BIT_XD, 6, "XD instructions are SSE; REX.W is " \ "secondary") \ - ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ + ENUM_ENTRY(IC_64BIT_XS, 6, "Just as meaningful as IC_64BIT_XD") \ ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ - ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ + ENUM_ENTRY(IC_64BIT_REXW_XS, 7, "OPSIZE could mean a different " \ "opcode") \ - ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ + ENUM_ENTRY(IC_64BIT_REXW_XD, 7, "Just as meaningful as " \ "IC_64BIT_REXW_XS") \ - ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ + ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 8, "The Dynamic Duo! Prefer over all " \ "else because this changes most " \ "operands' meaning") \ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ @@ -416,10 +421,6 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_M1616, "2+2-byte segment+offset address") \ ENUM_ENTRY(TYPE_M1632, "2+4-byte") \ ENUM_ENTRY(TYPE_M1664, "2+8-byte") \ - ENUM_ENTRY(TYPE_M16_32, "2+4-byte two-part memory operand (LIDT, LGDT)") \ - ENUM_ENTRY(TYPE_M16_16, "2+2-byte (BOUND)") \ - ENUM_ENTRY(TYPE_M32_32, "4+4-byte (BOUND)") \ - ENUM_ENTRY(TYPE_M16_64, "2+8-byte (LIDT, LGDT)") \ ENUM_ENTRY(TYPE_SRCIDX8, "1-byte memory at source index") \ ENUM_ENTRY(TYPE_SRCIDX16, "2-byte memory at source index") \ ENUM_ENTRY(TYPE_SRCIDX32, "4-byte memory at source index") \ @@ -438,14 +439,8 @@ enum OperandEncoding { ENUM_ENTRY(TYPE_M32FP, "32-bit IEE754 memory floating-point operand") \ ENUM_ENTRY(TYPE_M64FP, "64-bit") \ ENUM_ENTRY(TYPE_M80FP, "80-bit extended") \ - ENUM_ENTRY(TYPE_M16INT, "2-byte memory integer operand for use in " \ - "floating-point instructions") \ - ENUM_ENTRY(TYPE_M32INT, "4-byte") \ - ENUM_ENTRY(TYPE_M64INT, "8-byte") \ ENUM_ENTRY(TYPE_ST, "Position on the floating-point stack") \ - ENUM_ENTRY(TYPE_MM, "MMX register operand") \ - ENUM_ENTRY(TYPE_MM32, "4-byte MMX register or memory operand") \ - ENUM_ENTRY(TYPE_MM64, "8-byte") \ + ENUM_ENTRY(TYPE_MM64, "8-byte MMX register") \ ENUM_ENTRY(TYPE_XMM, "XMM register operand") \ ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ ENUM_ENTRY(TYPE_XMM64, "8-byte") \ @@ -500,7 +495,7 @@ enum ModifierType { }; #undef ENUM_ENTRY -static const unsigned X86_MAX_OPERANDS = 5; +static const unsigned X86_MAX_OPERANDS = 6; /// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode /// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode, |