diff options
Diffstat (limited to 'lib/Target/X86/Disassembler')
-rw-r--r-- | lib/Target/X86/Disassembler/CMakeLists.txt | 6 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/LLVMBuild.txt | 23 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.cpp | 235 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86Disassembler.h | 46 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.c | 80 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoder.h | 15 | ||||
-rw-r--r-- | lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h | 23 |
7 files changed, 324 insertions, 104 deletions
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 4f570d56e60f..0cd6db96dabe 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler X86DisassemblerDecoder.c ) -add_llvm_library_dependencies(LLVMX86Disassembler - LLVMMC - LLVMSupport - LLVMX86Info - ) - # workaround for hanging compilation on MSVC9 and 10 if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt new file mode 100644 index 000000000000..0609f3c28de3 --- /dev/null +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/X86/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86Disassembler +parent = X86 +required_libraries = MC Support X86Desc X86Info +add_to_library_groups = X86 diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 3aacb20e73df..8278bde7c218 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1,4 +1,4 @@ -//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// +//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,11 @@ #include "X86DisassemblerDecoder.h" #include "llvm/MC/EDInstInfo.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" @@ -42,6 +44,11 @@ void x86DisassemblerDebug(const char *file, dbgs() << file << ":" << line << ": " << s; } +const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) { + const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii); + return MII->getName(Opcode); +} + #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s)); namespace llvm { @@ -65,17 +72,19 @@ extern Target TheX86_32Target, TheX86_64Target; } static bool translateInstruction(MCInst &target, - InternalInstruction &source); + InternalInstruction &source, + const MCDisassembler *Dis); -X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : - MCDisassembler(STI), - fMode(mode) { -} +X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, + DisassemblerMode mode, + const MCInstrInfo *MII) + : MCDisassembler(STI), MII(MII), fMode(mode) {} X86GenericDisassembler::~X86GenericDisassembler() { + delete MII; } -EDInstInfo *X86GenericDisassembler::getEDInfo() const { +const EDInstInfo *X86GenericDisassembler::getEDInfo() const { return instInfoX86; } @@ -116,6 +125,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr, uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const { + CommentStream = &cStream; + InternalInstruction internalInstr; dlog_t loggerFn = logger; @@ -127,6 +138,7 @@ X86GenericDisassembler::getInstruction(MCInst &instr, (void*)®ion, loggerFn, (void*)&vStream, + (void*)MII, address, fMode); @@ -136,7 +148,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr, } else { size = internalInstr.length; - return (!translateInstruction(instr, internalInstr)) ? Success : Fail; + return (!translateInstruction(instr, internalInstr, this)) ? + Success : Fail; } } @@ -161,6 +174,140 @@ static void translateRegister(MCInst &mcInst, Reg reg) { mcInst.addOperand(MCOperand::CreateReg(llvmRegnum)); } +/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the +/// immediate Value in the MCInst. +/// +/// @param Value - The immediate Value, has had any PC adjustment made by +/// the caller. +/// @param isBranch - If the instruction is a branch instruction +/// @param Address - The starting address of the instruction +/// @param Offset - The byte offset to this immediate in the instruction +/// @param Width - The byte width of this immediate in the instruction +/// +/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was +/// called then that function is called to get any symbolic information for the +/// immediate in the instruction using the Address, Offset and Width. If that +/// returns non-zero then the symbolic information it returns is used to create +/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo() +/// returns zero and isBranch is true then a symbol look up for immediate Value +/// is done and if a symbol is found an MCExpr is created with that, else +/// an MCExpr with the immediate Value is created. This function returns true +/// if it adds an operand to the MCInst and false otherwise. +static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch, + uint64_t Address, uint64_t Offset, + uint64_t Width, MCInst &MI, + const MCDisassembler *Dis) { + LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback(); + struct LLVMOpInfo1 SymbolicOp; + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + void *DisInfo = Dis->getDisInfoBlock(); + + if (!getOpInfo || + !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (!SymbolLookUp) + return false; + uint64_t ReferenceType; + if (isBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + } + // For branches always create an MCExpr so it gets printed as hex address. + else if (isBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + (*Dis->CommentStream) << "symbol stub for: " << ReferenceName; + if (!Name && !isBranch) + return false; + } + + MCContext *Ctx = Dis->getMCContext(); + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + MI.addOperand(MCOperand::CreateExpr(Expr)); + + return true; +} + +/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being +/// referenced by a load instruction with the base register that is the rip. +/// These can often be addresses in a literal pool. The Address of the +/// instruction and its immediate Value are used to determine the address +/// being referenced in the literal pool entry. The SymbolLookUp call back will +/// return a pointer to a literal 'C' string if the referenced address is an +/// address into a section with 'C' string literals. +static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value, + const void *Decoder) { + const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder); + LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback(); + if (SymbolLookUp) { + void *DisInfo = Dis->getDisInfoBlock(); + uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; + const char *ReferenceName; + (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + (*Dis->CommentStream) << "literal pool for: " << ReferenceName; + } +} + /// translateImmediate - Appends an immediate operand to an MCInst. /// /// @param mcInst - The MCInst to append to. @@ -169,10 +316,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) { /// @param insn - The internal instruction. static void translateImmediate(MCInst &mcInst, uint64_t immediate, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { // Sign-extend the immediate if necessary. - OperandType type = operand.type; + OperandType type = (OperandType)operand.type; if (type == TYPE_RELv) { switch (insn.displacementSize) { @@ -225,6 +373,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, } } + bool isBranch = false; + uint64_t pcrel = 0; switch (type) { case TYPE_XMM128: mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); @@ -232,8 +382,11 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, case TYPE_XMM256: mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); return; - case TYPE_MOFFS8: case TYPE_REL8: + isBranch = true; + pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; + // fall through to sign extend the immediate if needed. + case TYPE_MOFFS8: if(immediate & 0x80) immediate |= ~(0xffull); break; @@ -241,9 +394,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, if(immediate & 0x8000) immediate |= ~(0xffffull); break; - case TYPE_MOFFS32: case TYPE_REL32: case TYPE_REL64: + isBranch = true; + pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize; + // fall through to sign extend the immediate if needed. + case TYPE_MOFFS32: if(immediate & 0x80000000) immediate |= ~(0xffffffffull); break; @@ -253,7 +409,10 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } - mcInst.addOperand(MCOperand::CreateImm(immediate)); + if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation, + insn.immediateOffset, insn.immediateSize, + mcInst, Dis)) + mcInst.addOperand(MCOperand::CreateImm(immediate)); } /// translateRMRegister - Translates a register stored in the R/M field of the @@ -300,7 +459,8 @@ static bool translateRMRegister(MCInst &mcInst, /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. /// @return - 0 on success; nonzero otherwise -static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn, + const MCDisassembler *Dis) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -318,6 +478,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { MCOperand indexReg; MCOperand displacement; MCOperand segmentReg; + uint64_t pcrel = 0; if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) { if (insn.sibBase != SIB_BASE_NONE) { @@ -359,8 +520,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base"); return true; } - if (insn.mode == MODE_64BIT) + if (insn.mode == MODE_64BIT){ + pcrel = insn.startLocation + + insn.displacementOffset + insn.displacementSize; + tryAddingPcLoadReferenceComment(insn.startLocation + + insn.displacementOffset, + insn.displacement + pcrel, Dis); baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6 + } else baseReg = MCOperand::CreateReg(0); @@ -426,7 +593,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); - mcInst.addOperand(displacement); + if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false, + insn.startLocation, insn.displacementOffset, + insn.displacementSize, mcInst, Dis)) + mcInst.addOperand(displacement); mcInst.addOperand(segmentReg); return false; } @@ -440,7 +610,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { /// from. /// @return - 0 on success; nonzero otherwise static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, const MCDisassembler *Dis) { switch (operand.type) { default: debug("Unexpected type for a R/M operand"); @@ -480,7 +650,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M1632: case TYPE_M1664: case TYPE_LEA: - return translateRMMemory(mcInst, insn); + return translateRMMemory(mcInst, insn, Dis); } } @@ -510,7 +680,8 @@ static bool translateFPRegister(MCInst &mcInst, /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { switch (operand.encoding) { default: debug("Unhandled operand encoding during translation"); @@ -519,7 +690,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateRegister(mcInst, insn.reg); return false; case ENCODING_RM: - return translateRM(mcInst, operand, insn); + return translateRM(mcInst, operand, insn, Dis); case ENCODING_CB: case ENCODING_CW: case ENCODING_CD: @@ -537,7 +708,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, translateImmediate(mcInst, insn.immediates[insn.numImmediatesTranslated++], operand, - insn); + insn, + Dis); return false; case ENCODING_RB: case ENCODING_RW: @@ -556,7 +728,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_DUP: return translateOperand(mcInst, insn.spec->operands[operand.type - TYPE_DUP0], - insn); + insn, Dis); } } @@ -567,7 +739,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, /// @param insn - The internal instruction. /// @return - false on success; true otherwise. static bool translateInstruction(MCInst &mcInst, - InternalInstruction &insn) { + InternalInstruction &insn, + const MCDisassembler *Dis) { if (!insn.spec) { debug("Instruction has no specification"); return true; @@ -581,7 +754,7 @@ static bool translateInstruction(MCInst &mcInst, for (index = 0; index < X86_MAX_OPERANDS; ++index) { if (insn.spec->operands[index].encoding != ENCODING_NONE) { - if (translateOperand(mcInst, insn.spec->operands[index], insn)) { + if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) { return true; } } @@ -590,12 +763,16 @@ static bool translateInstruction(MCInst &mcInst, return false; } -static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { - return new X86Disassembler::X86_32Disassembler(STI); +static MCDisassembler *createX86_32Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT, + T.createMCInstrInfo()); } -static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { - return new X86Disassembler::X86_64Disassembler(STI); +static MCDisassembler *createX86_64Disassembler(const Target &T, + const MCSubtargetInfo &STI) { + return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT, + T.createMCInstrInfo()); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h index 6ac9a0ff1019..c11f51c6a9ac 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/lib/Target/X86/Disassembler/X86Disassembler.h @@ -1,4 +1,4 @@ -//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===// +//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -78,7 +78,7 @@ const char* name; #define INSTRUCTION_IDS \ - const InstrUID *instructionIDs; + unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -87,11 +87,10 @@ #include "llvm/MC/MCDisassembler.h" -struct InternalInstruction; - namespace llvm { class MCInst; +class MCInstrInfo; class MCSubtargetInfo; class MemoryObject; class raw_ostream; @@ -104,13 +103,16 @@ namespace X86Disassembler { /// All each platform class should have to do is subclass the constructor, and /// provide a different disassemblerMode value. class X86GenericDisassembler : public MCDisassembler { -protected: + const MCInstrInfo *MII; +public: /// Constructor - Initializes the disassembler. /// /// @param mode - The X86 architecture mode to decode for. - X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode); -public: + X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode, + const MCInstrInfo *MII); +private: ~X86GenericDisassembler(); +public: /// getInstruction - See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, @@ -121,37 +123,13 @@ public: raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. - EDInstInfo *getEDInfo() const; + const EDInstInfo *getEDInfo() const; private: DisassemblerMode fMode; }; -/// X86_16Disassembler - 16-bit X86 disassembler. -class X86_16Disassembler : public X86GenericDisassembler { -public: - X86_16Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_16BIT) { - } -}; - -/// X86_16Disassembler - 32-bit X86 disassembler. -class X86_32Disassembler : public X86GenericDisassembler { -public: - X86_32Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_32BIT) { - } -}; - -/// X86_16Disassembler - 64-bit X86 disassembler. -class X86_64Disassembler : public X86GenericDisassembler { -public: - X86_64Disassembler(const MCSubtargetInfo &STI) : - X86GenericDisassembler(STI, MODE_64BIT) { - } -}; - } // namespace X86Disassembler - + } // namespace llvm - + #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index f9b0fe5d51b9..602087756b23 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==* +/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -82,11 +82,9 @@ static int modRMRequired(OpcodeType type, decision = &THREEBYTEA7_SYM; break; } - + return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. modrm_type != MODRM_ONEENTRY; - - return 0; } /* @@ -103,12 +101,9 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext, uint8_t opcode, uint8_t modRM) { - const struct ModRMDecision* dec; + const struct ModRMDecision* dec = 0; switch (type) { - default: - debug("Unknown opcode type"); - return 0; case ONEBYTE: dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; @@ -134,14 +129,17 @@ static InstrUID decode(OpcodeType type, debug("Corrupt table! Unknown modrm_type"); return 0; case MODRM_ONEENTRY: - return dec->instructionIDs[0]; + return modRMTable[dec->instructionIDs]; case MODRM_SPLITRM: if (modFromModRM(modRM) == 0x3) - return dec->instructionIDs[1]; - else - return dec->instructionIDs[0]; + return modRMTable[dec->instructionIDs+1]; + return modRMTable[dec->instructionIDs]; + case MODRM_SPLITREG: + if (modFromModRM(modRM) == 0x3) + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; + return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; case MODRM_FULL: - return dec->instructionIDs[modRM]; + return modRMTable[dec->instructionIDs+modRM]; } } @@ -314,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) { if (consumeByte(insn, &byte)) return -1; + + /* + * If the first byte is a LOCK prefix break and let it be disassembled + * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>. + * FIXME there is currently no way to get the disassembler to print the + * lock prefix if it is not the first byte. + */ + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + break; switch (byte) { case 0xf0: /* LOCK */ @@ -712,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn) { +static int getID(struct InternalInstruction* insn, void *miiArg) { uint8_t attrMask; uint16_t instructionID; @@ -765,6 +772,8 @@ static int getID(struct InternalInstruction* insn) { else { if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + attrMask |= ATTR_ADSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) @@ -773,17 +782,20 @@ static int getID(struct InternalInstruction* insn) { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; - + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; - + /* The following clauses compensate for limitations of the tables. */ - - if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { + + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && + !(attrMask & ATTR_OPSIZE)) { /* * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * has precedence since there are no L-bit with W-bit entries in the tables. * So if the L-bit isn't significant we should use the W-bit instead. + * We only need to do this if the instruction doesn't specify OpSize since + * there is a VEX_L_W_OPSIZE table. */ const struct InstructionSpecifier *spec; @@ -823,7 +835,7 @@ static int getID(struct InternalInstruction* insn) { const struct InstructionSpecifier *spec; uint16_t instructionIDWithOpsize; - const struct InstructionSpecifier *specWithOpsize; + const char *specName, *specWithOpSizeName; spec = specifierForUID(instructionID); @@ -840,11 +852,13 @@ static int getID(struct InternalInstruction* insn) { return 0; } - specWithOpsize = specifierForUID(instructionIDWithOpsize); - - if (is16BitEquvalent(spec->name, specWithOpsize->name)) { + specName = x86DisassemblerGetInstrName(instructionID, miiArg); + specWithOpSizeName = + x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg); + + if (is16BitEquvalent(specName, specWithOpSizeName)) { insn->instructionID = instructionIDWithOpsize; - insn->spec = specWithOpsize; + insn->spec = specifierForUID(instructionIDWithOpsize); } else { insn->instructionID = instructionID; insn->spec = spec; @@ -1011,6 +1025,7 @@ static int readDisplacement(struct InternalInstruction* insn) { return 0; insn->consumedDisplacement = TRUE; + insn->displacementOffset = insn->readerCursor - insn->startLocation; switch (insn->eaDisplacement) { case EA_DISP_NONE: @@ -1407,6 +1422,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { size = insn->immediateSize; else insn->immediateSize = size; + insn->immediateOffset = insn->readerCursor - insn->startLocation; switch (size) { case 1: @@ -1469,6 +1485,7 @@ static int readVVVV(struct InternalInstruction* insn) { static int readOperands(struct InternalInstruction* insn) { int index; int hasVVVV, needVVVV; + int sawRegImm = 0; dbgprintf(insn, "readOperands()"); @@ -1497,11 +1514,25 @@ static int readOperands(struct InternalInstruction* insn) { dbgprintf(insn, "We currently don't hande code-offset encodings"); return -1; case ENCODING_IB: + if (sawRegImm) { + /* Saw a register immediate so don't read again and instead split the + previous immediate. FIXME: This is a hack. */ + insn->immediates[insn->numImmediatesConsumed] = + insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; + ++insn->numImmediatesConsumed; + break; + } if (readImmediate(insn, 1)) return -1; if (insn->spec->operands[index].type == TYPE_IMM3 && insn->immediates[insn->numImmediatesConsumed - 1] > 7) return -1; + if (insn->spec->operands[index].type == TYPE_IMM5 && + insn->immediates[insn->numImmediatesConsumed - 1] > 31) + return -1; + if (insn->spec->operands[index].type == TYPE_XMM128 || + insn->spec->operands[index].type == TYPE_XMM256) + sawRegImm = 1; break; case ENCODING_IW: if (readImmediate(insn, 2)) @@ -1593,6 +1624,7 @@ int decodeInstruction(struct InternalInstruction* insn, void* readerArg, dlog_t logger, void* loggerArg, + void* miiArg, uint64_t startLoc, DisassemblerMode mode) { memset(insn, 0, sizeof(struct InternalInstruction)); @@ -1608,7 +1640,7 @@ int decodeInstruction(struct InternalInstruction* insn, if (readPrefixes(insn) || readOpcode(insn) || - getID(insn) || + getID(insn, miiArg) || insn->instructionID == 0 || readOperands(insn)) return -1; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index a9c90f8f9bda..fae309b45d02 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==* +/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -20,11 +20,10 @@ extern "C" { #endif -#define INSTRUCTION_SPECIFIER_FIELDS \ - const char* name; +#define INSTRUCTION_SPECIFIER_FIELDS #define INSTRUCTION_IDS \ - const InstrUID *instructionIDs; + unsigned instructionIDs; #include "X86DisassemblerDecoderCommon.h" @@ -460,6 +459,11 @@ struct InternalInstruction { uint8_t addressSize; uint8_t displacementSize; uint8_t immediateSize; + + /* Offsets from the start of the instruction to the pieces of data, which is + needed to find relocation entries for adding symbolic operands */ + uint8_t displacementOffset; + uint8_t immediateOffset; /* opcode state */ @@ -554,6 +558,7 @@ int decodeInstruction(struct InternalInstruction* insn, void* readerArg, dlog_t logger, void* loggerArg, + void* miiArg, uint64_t startLoc, DisassemblerMode mode); @@ -568,6 +573,8 @@ void x86DisassemblerDebug(const char *file, unsigned line, const char *s); +const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii); + #ifdef __cplusplus } #endif diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 8b7933545a56..13e113609bf3 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -1,4 +1,4 @@ -/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==* +/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===* * * The LLVM Compiler Infrastructure * @@ -54,8 +54,9 @@ ENUM_ENTRY(ATTR_XD, 0x04) \ ENUM_ENTRY(ATTR_REXW, 0x08) \ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ - ENUM_ENTRY(ATTR_VEX, 0x20) \ - ENUM_ENTRY(ATTR_VEXL, 0x40) + ENUM_ENTRY(ATTR_ADSIZE, 0x20) \ + ENUM_ENTRY(ATTR_VEX, 0x40) \ + ENUM_ENTRY(ATTR_VEXL, 0x80) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -77,6 +78,8 @@ enum attributeBits { "64-bit mode but no more") \ ENUM_ENTRY(IC_OPSIZE, 3, "requires an OPSIZE prefix, so " \ "operands change width") \ + ENUM_ENTRY(IC_ADSIZE, 3, "requires an ADSIZE prefix, so " \ + "operands change width") \ ENUM_ENTRY(IC_XD, 2, "may say something about the opcode " \ "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ @@ -88,6 +91,7 @@ enum attributeBits { ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_ADSIZE, 3, "Just as meaningful as IC_ADSIZE") \ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ "secondary") \ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ @@ -111,7 +115,8 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ - ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \ + ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") #define ENUM_ENTRY(n, r, d) n, @@ -155,6 +160,8 @@ typedef uint16_t InstrUID; * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode * corresponds to one instruction; otherwise, it corresponds to * a different instruction. + * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This + corresponds to instructions that use reg field as opcode * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond * to a different instruction. */ @@ -162,6 +169,7 @@ typedef uint16_t InstrUID; #define MODRMTYPES \ ENUM_ENTRY(MODRM_ONEENTRY) \ ENUM_ENTRY(MODRM_SPLITRM) \ + ENUM_ENTRY(MODRM_SPLITREG) \ ENUM_ENTRY(MODRM_FULL) #define ENUM_ENTRY(n) n, @@ -265,6 +273,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_IMM32, "4-byte") \ ENUM_ENTRY(TYPE_IMM64, "8-byte") \ ENUM_ENTRY(TYPE_IMM3, "1-byte immediate operand between 0 and 7") \ + ENUM_ENTRY(TYPE_IMM5, "1-byte immediate operand between 0 and 31") \ ENUM_ENTRY(TYPE_RM8, "1-byte register or memory operand") \ ENUM_ENTRY(TYPE_RM16, "2-byte") \ ENUM_ENTRY(TYPE_RM32, "4-byte") \ @@ -335,8 +344,8 @@ typedef enum { * operand. */ struct OperandSpecifier { - OperandEncoding encoding; - OperandType type; + uint8_t encoding; + uint8_t type; }; /* @@ -363,7 +372,7 @@ typedef enum { * its operands. */ struct InstructionSpecifier { - ModifierType modifierType; + uint8_t modifierType; uint8_t modifierBase; struct OperandSpecifier operands[X86_MAX_OPERANDS]; |