diff options
Diffstat (limited to 'llvm/lib/MC/MCDisassembler')
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/Disassembler.cpp | 343 | ||||
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/Disassembler.h | 124 | ||||
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/MCDisassembler.cpp | 46 | ||||
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp | 199 | ||||
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp | 30 | ||||
| -rw-r--r-- | llvm/lib/MC/MCDisassembler/MCSymbolizer.cpp | 13 | 
6 files changed, 755 insertions, 0 deletions
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp new file mode 100644 index 0000000000000..21bdc2eaea3e4 --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -0,0 +1,343 @@ +//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Disassembler.h" +#include "llvm-c/Disassembler.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSchedule.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstddef> +#include <cstring> + +using namespace llvm; + +// LLVMCreateDisasm() creates a disassembler for the TripleName.  Symbolic +// disassembly is supported by passing a block of information in the DisInfo +// parameter and specifying the TagType and callback functions as described in +// the header llvm-c/Disassembler.h .  The pointer to the block and the +// functions can all be passed as NULL.  If successful, this returns a +// disassembler context.  If not, it returns NULL. +// +LLVMDisasmContextRef +LLVMCreateDisasmCPUFeatures(const char *TT, const char *CPU, +                            const char *Features, void *DisInfo, int TagType, +                            LLVMOpInfoCallback GetOpInfo, +                            LLVMSymbolLookupCallback SymbolLookUp) { +  // Get the target. +  std::string Error; +  const Target *TheTarget = TargetRegistry::lookupTarget(TT, Error); +  if (!TheTarget) +    return nullptr; + +  std::unique_ptr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TT)); +  if (!MRI) +    return nullptr; + +  // Get the assembler info needed to setup the MCContext. +  std::unique_ptr<const MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TT)); +  if (!MAI) +    return nullptr; + +  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo()); +  if (!MII) +    return nullptr; + +  std::unique_ptr<const MCSubtargetInfo> STI( +      TheTarget->createMCSubtargetInfo(TT, CPU, Features)); +  if (!STI) +    return nullptr; + +  // Set up the MCContext for creating symbols and MCExpr's. +  std::unique_ptr<MCContext> Ctx(new MCContext(MAI.get(), MRI.get(), nullptr)); +  if (!Ctx) +    return nullptr; + +  // Set up disassembler. +  std::unique_ptr<MCDisassembler> DisAsm( +      TheTarget->createMCDisassembler(*STI, *Ctx)); +  if (!DisAsm) +    return nullptr; + +  std::unique_ptr<MCRelocationInfo> RelInfo( +      TheTarget->createMCRelocationInfo(TT, *Ctx)); +  if (!RelInfo) +    return nullptr; + +  std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer( +      TT, GetOpInfo, SymbolLookUp, DisInfo, Ctx.get(), std::move(RelInfo))); +  DisAsm->setSymbolizer(std::move(Symbolizer)); + +  // Set up the instruction printer. +  int AsmPrinterVariant = MAI->getAssemblerDialect(); +  std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( +      Triple(TT), AsmPrinterVariant, *MAI, *MII, *MRI)); +  if (!IP) +    return nullptr; + +  LLVMDisasmContext *DC = new LLVMDisasmContext( +      TT, DisInfo, TagType, GetOpInfo, SymbolLookUp, TheTarget, std::move(MAI), +      std::move(MRI), std::move(STI), std::move(MII), std::move(Ctx), +      std::move(DisAsm), std::move(IP)); +  if (!DC) +    return nullptr; + +  DC->setCPU(CPU); +  return DC; +} + +LLVMDisasmContextRef +LLVMCreateDisasmCPU(const char *TT, const char *CPU, void *DisInfo, int TagType, +                    LLVMOpInfoCallback GetOpInfo, +                    LLVMSymbolLookupCallback SymbolLookUp) { +  return LLVMCreateDisasmCPUFeatures(TT, CPU, "", DisInfo, TagType, GetOpInfo, +                                     SymbolLookUp); +} + +LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo, +                                      int TagType, LLVMOpInfoCallback GetOpInfo, +                                      LLVMSymbolLookupCallback SymbolLookUp) { +  return LLVMCreateDisasmCPUFeatures(TT, "", "", DisInfo, TagType, GetOpInfo, +                                     SymbolLookUp); +} + +// +// LLVMDisasmDispose() disposes of the disassembler specified by the context. +// +void LLVMDisasmDispose(LLVMDisasmContextRef DCR){ +  LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +  delete DC; +} + +/// Emits the comments that are stored in \p DC comment stream. +/// Each comment in the comment stream must end with a newline. +static void emitComments(LLVMDisasmContext *DC, +                         formatted_raw_ostream &FormattedOS) { +  // Flush the stream before taking its content. +  StringRef Comments = DC->CommentsToEmit.str(); +  // Get the default information for printing a comment. +  const MCAsmInfo *MAI = DC->getAsmInfo(); +  StringRef CommentBegin = MAI->getCommentString(); +  unsigned CommentColumn = MAI->getCommentColumn(); +  bool IsFirst = true; +  while (!Comments.empty()) { +    if (!IsFirst) +      FormattedOS << '\n'; +    // Emit a line of comments. +    FormattedOS.PadToColumn(CommentColumn); +    size_t Position = Comments.find('\n'); +    FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position); +    // Move after the newline character. +    Comments = Comments.substr(Position+1); +    IsFirst = false; +  } +  FormattedOS.flush(); + +  // Tell the comment stream that the vector changed underneath it. +  DC->CommentsToEmit.clear(); +} + +/// Gets latency information for \p Inst from the itinerary +/// scheduling model, based on \p DC information. +/// \return The maximum expected latency over all the operands or -1 +/// if no information is available. +static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) { +  const int NoInformationAvailable = -1; + +  // Check if we have a CPU to get the itinerary information. +  if (DC->getCPU().empty()) +    return NoInformationAvailable; + +  // Get itinerary information. +  const MCSubtargetInfo *STI = DC->getSubtargetInfo(); +  InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU()); +  // Get the scheduling class of the requested instruction. +  const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); +  unsigned SCClass = Desc.getSchedClass(); + +  int Latency = 0; +  for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd; +       ++OpIdx) +    Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx)); + +  return Latency; +} + +/// Gets latency information for \p Inst, based on \p DC information. +/// \return The maximum expected latency over all the definitions or -1 +/// if no information is available. +static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) { +  // Try to compute scheduling information. +  const MCSubtargetInfo *STI = DC->getSubtargetInfo(); +  const MCSchedModel SCModel = STI->getSchedModel(); +  const int NoInformationAvailable = -1; + +  // Check if we have a scheduling model for instructions. +  if (!SCModel.hasInstrSchedModel()) +    // Try to fall back to the itinerary model if the scheduling model doesn't +    // have a scheduling table.  Note the default does not have a table. +    return getItineraryLatency(DC, Inst); + +  // Get the scheduling class of the requested instruction. +  const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); +  unsigned SCClass = Desc.getSchedClass(); +  const MCSchedClassDesc *SCDesc = SCModel.getSchedClassDesc(SCClass); +  // Resolving the variant SchedClass requires an MI to pass to +  // SubTargetInfo::resolveSchedClass. +  if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant()) +    return NoInformationAvailable; + +  // Compute output latency. +  int16_t Latency = 0; +  for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; +       DefIdx != DefEnd; ++DefIdx) { +    // Lookup the definition's write latency in SubtargetInfo. +    const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, +                                                                   DefIdx); +    Latency = std::max(Latency, WLEntry->Cycles); +  } + +  return Latency; +} + +/// Emits latency information in DC->CommentStream for \p Inst, based +/// on the information available in \p DC. +static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) { +  int Latency = getLatency(DC, Inst); + +  // Report only interesting latencies. +  if (Latency < 2) +    return; + +  DC->CommentStream << "Latency: " << Latency << '\n'; +} + +// +// LLVMDisasmInstruction() disassembles a single instruction using the +// disassembler context specified in the parameter DC.  The bytes of the +// instruction are specified in the parameter Bytes, and contains at least +// BytesSize number of bytes.  The instruction is at the address specified by +// the PC parameter.  If a valid instruction can be disassembled its string is +// returned indirectly in OutString which whos size is specified in the +// parameter OutStringSize.  This function returns the number of bytes in the +// instruction or zero if there was no valid instruction.  If this function +// returns zero the caller will have to pick how many bytes they want to step +// over by printing a .byte, .long etc. to continue. +// +size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, +                             uint64_t BytesSize, uint64_t PC, char *OutString, +                             size_t OutStringSize){ +  LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +  // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject. +  ArrayRef<uint8_t> Data(Bytes, BytesSize); + +  uint64_t Size; +  MCInst Inst; +  const MCDisassembler *DisAsm = DC->getDisAsm(); +  MCInstPrinter *IP = DC->getIP(); +  MCDisassembler::DecodeStatus S; +  SmallVector<char, 64> InsnStr; +  raw_svector_ostream Annotations(InsnStr); +  S = DisAsm->getInstruction(Inst, Size, Data, PC, +                             /*REMOVE*/ nulls(), Annotations); +  switch (S) { +  case MCDisassembler::Fail: +  case MCDisassembler::SoftFail: +    // FIXME: Do something different for soft failure modes? +    return 0; + +  case MCDisassembler::Success: { +    StringRef AnnotationsStr = Annotations.str(); + +    SmallVector<char, 64> InsnStr; +    raw_svector_ostream OS(InsnStr); +    formatted_raw_ostream FormattedOS(OS); +    IP->printInst(&Inst, FormattedOS, AnnotationsStr, *DC->getSubtargetInfo()); + +    if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency) +      emitLatency(DC, Inst); + +    emitComments(DC, FormattedOS); + +    assert(OutStringSize != 0 && "Output buffer cannot be zero size"); +    size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); +    std::memcpy(OutString, InsnStr.data(), OutputSize); +    OutString[OutputSize] = '\0'; // Terminate string. + +    return Size; +  } +  } +  llvm_unreachable("Invalid DecodeStatus!"); +} + +// +// LLVMSetDisasmOptions() sets the disassembler's options.  It returns 1 if it +// can set all the Options and 0 otherwise. +// +int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){ +  if (Options & LLVMDisassembler_Option_UseMarkup){ +      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +      MCInstPrinter *IP = DC->getIP(); +      IP->setUseMarkup(true); +      DC->addOptions(LLVMDisassembler_Option_UseMarkup); +      Options &= ~LLVMDisassembler_Option_UseMarkup; +  } +  if (Options & LLVMDisassembler_Option_PrintImmHex){ +      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +      MCInstPrinter *IP = DC->getIP(); +      IP->setPrintImmHex(true); +      DC->addOptions(LLVMDisassembler_Option_PrintImmHex); +      Options &= ~LLVMDisassembler_Option_PrintImmHex; +  } +  if (Options & LLVMDisassembler_Option_AsmPrinterVariant){ +      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +      // Try to set up the new instruction printer. +      const MCAsmInfo *MAI = DC->getAsmInfo(); +      const MCInstrInfo *MII = DC->getInstrInfo(); +      const MCRegisterInfo *MRI = DC->getRegisterInfo(); +      int AsmPrinterVariant = MAI->getAssemblerDialect(); +      AsmPrinterVariant = AsmPrinterVariant == 0 ? 1 : 0; +      MCInstPrinter *IP = DC->getTarget()->createMCInstPrinter( +          Triple(DC->getTripleName()), AsmPrinterVariant, *MAI, *MII, *MRI); +      if (IP) { +        DC->setIP(IP); +        DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant); +        Options &= ~LLVMDisassembler_Option_AsmPrinterVariant; +      } +  } +  if (Options & LLVMDisassembler_Option_SetInstrComments) { +    LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +    MCInstPrinter *IP = DC->getIP(); +    IP->setCommentStream(DC->CommentStream); +    DC->addOptions(LLVMDisassembler_Option_SetInstrComments); +    Options &= ~LLVMDisassembler_Option_SetInstrComments; +  } +  if (Options & LLVMDisassembler_Option_PrintLatency) { +    LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR); +    DC->addOptions(LLVMDisassembler_Option_PrintLatency); +    Options &= ~LLVMDisassembler_Option_PrintLatency; +  } +  return (Options == 0); +} diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.h b/llvm/lib/MC/MCDisassembler/Disassembler.h new file mode 100644 index 0000000000000..e5aab53a76136 --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/Disassembler.h @@ -0,0 +1,124 @@ +//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Disassembly library's disassembler +// context.  The disassembler is responsible for producing strings for +// individual instructions according to a given architecture and disassembly +// syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H +#define LLVM_LIB_MC_MCDISASSEMBLER_DISASSEMBLER_H + +#include "llvm-c/Disassembler.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/raw_ostream.h" +#include <string> +#include <utility> + +namespace llvm { +class Target; + +// +// This is the disassembler context returned by LLVMCreateDisasm(). +// +class LLVMDisasmContext { +private: +  // +  // The passed parameters when the disassembler context is created. +  // +  // The TripleName for this disassembler. +  std::string TripleName; +  // The pointer to the caller's block of symbolic information. +  void *DisInfo; +  // The Triple specific symbolic information type returned by GetOpInfo. +  int TagType; +  // The function to get the symbolic information for operands. +  LLVMOpInfoCallback GetOpInfo; +  // The function to look up a symbol name. +  LLVMSymbolLookupCallback SymbolLookUp; +  // +  // The objects created and saved by LLVMCreateDisasm() then used by +  // LLVMDisasmInstruction(). +  // +  // The LLVM target corresponding to the disassembler. +  // FIXME: using std::unique_ptr<const llvm::Target> causes a malloc error +  //        when this LLVMDisasmContext is deleted. +  const Target *TheTarget; +  // The assembly information for the target architecture. +  std::unique_ptr<const llvm::MCAsmInfo> MAI; +  // The register information for the target architecture. +  std::unique_ptr<const llvm::MCRegisterInfo> MRI; +  // The subtarget information for the target architecture. +  std::unique_ptr<const llvm::MCSubtargetInfo> MSI; +  // The instruction information for the target architecture. +  std::unique_ptr<const llvm::MCInstrInfo> MII; +  // The assembly context for creating symbols and MCExprs. +  std::unique_ptr<const llvm::MCContext> Ctx; +  // The disassembler for the target architecture. +  std::unique_ptr<const llvm::MCDisassembler> DisAsm; +  // The instruction printer for the target architecture. +  std::unique_ptr<llvm::MCInstPrinter> IP; +  // The options used to set up the disassembler. +  uint64_t Options; +  // The CPU string. +  std::string CPU; + +public: +  // Comment stream and backing vector. +  SmallString<128> CommentsToEmit; +  raw_svector_ostream CommentStream; + +  LLVMDisasmContext(std::string TripleName, void *DisInfo, int TagType, +                    LLVMOpInfoCallback GetOpInfo, +                    LLVMSymbolLookupCallback SymbolLookUp, +                    const Target *TheTarget, +                    std::unique_ptr<const MCAsmInfo> &&MAI, +                    std::unique_ptr<const MCRegisterInfo> &&MRI, +                    std::unique_ptr<const MCSubtargetInfo> &&MSI, +                    std::unique_ptr<const MCInstrInfo> &&MII, +                    std::unique_ptr<const llvm::MCContext> &&Ctx, +                    std::unique_ptr<const MCDisassembler> &&DisAsm, +                    std::unique_ptr<MCInstPrinter> &&IP) +      : TripleName(std::move(TripleName)), DisInfo(DisInfo), TagType(TagType), +        GetOpInfo(GetOpInfo), SymbolLookUp(SymbolLookUp), TheTarget(TheTarget), +        MAI(std::move(MAI)), MRI(std::move(MRI)), MSI(std::move(MSI)), +        MII(std::move(MII)), Ctx(std::move(Ctx)), DisAsm(std::move(DisAsm)), +        IP(std::move(IP)), Options(0), CommentStream(CommentsToEmit) {} +  const std::string &getTripleName() const { return TripleName; } +  void *getDisInfo() const { return DisInfo; } +  int getTagType() const { return TagType; } +  LLVMOpInfoCallback getGetOpInfo() const { return GetOpInfo; } +  LLVMSymbolLookupCallback getSymbolLookupCallback() const { +    return SymbolLookUp; +  } +  const Target *getTarget() const { return TheTarget; } +  const MCDisassembler *getDisAsm() const { return DisAsm.get(); } +  const MCAsmInfo *getAsmInfo() const { return MAI.get(); } +  const MCInstrInfo *getInstrInfo() const { return MII.get(); } +  const MCRegisterInfo *getRegisterInfo() const { return MRI.get(); } +  const MCSubtargetInfo *getSubtargetInfo() const { return MSI.get(); } +  MCInstPrinter *getIP() { return IP.get(); } +  void setIP(MCInstPrinter *NewIP) { IP.reset(NewIP); } +  uint64_t getOptions() const { return Options; } +  void addOptions(uint64_t Options) { this->Options |= Options; } +  StringRef getCPU() const { return CPU; } +  void setCPU(const char *CPU) { this->CPU = CPU; } +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp new file mode 100644 index 0000000000000..063f7e706024b --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -0,0 +1,46 @@ +//===- MCDisassembler.cpp - Disassembler interface ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace llvm; + +MCDisassembler::~MCDisassembler() = default; + +MCDisassembler::DecodeStatus MCDisassembler::onSymbolStart( +    StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, +    raw_ostream &VStream, raw_ostream &CStream) const { +  Size = 0; +  return MCDisassembler::Success; +} + +bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, +                                              uint64_t Address, bool IsBranch, +                                              uint64_t Offset, +                                              uint64_t InstSize) const { +  raw_ostream &cStream = CommentStream ? *CommentStream : nulls(); +  if (Symbolizer) +    return Symbolizer->tryAddingSymbolicOperand(Inst, cStream, Value, Address, +                                                IsBranch, Offset, InstSize); +  return false; +} + +void MCDisassembler::tryAddingPcLoadReferenceComment(int64_t Value, +                                                     uint64_t Address) const { +  raw_ostream &cStream = CommentStream ? *CommentStream : nulls(); +  if (Symbolizer) +    Symbolizer->tryAddingPcLoadReferenceComment(cStream, Value, Address); +} + +void MCDisassembler::setSymbolizer(std::unique_ptr<MCSymbolizer> Symzer) { +  Symbolizer = std::move(Symzer); +} diff --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp new file mode 100644 index 0000000000000..7befef86303cb --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -0,0 +1,199 @@ +//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> + +using namespace llvm; + +namespace llvm { +class Triple; +} + +// This function tries to add a symbolic operand in place of the immediate +// Value in the MCInst. The immediate Value has had any PC adjustment made by +// the caller. If the instruction is a branch instruction then IsBranch is true, +// else false. If the getOpInfo() function was set as part of the +// setupForSymbolicDisassembly() call then that function is called to get any +// symbolic information at the Address for this instruction. If that returns +// non-zero then the symbolic information it returns is used to create an MCExpr +// and that is added as an operand to the MCInst. If getOpInfo() returns zero +// and IsBranch is true then a symbol look up for Value is done and if a symbol +// is found an MCExpr is created with that, else an MCExpr with Value is +// created. This function returns true if it adds an operand to the MCInst and +// false otherwise. +bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, +                                                    raw_ostream &cStream, +                                                    int64_t Value, +                                                    uint64_t Address, +                                                    bool IsBranch, +                                                    uint64_t Offset, +                                                    uint64_t InstSize) { +  struct LLVMOpInfo1 SymbolicOp; +  std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); +  SymbolicOp.Value = Value; + +  if (!GetOpInfo || +      !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { +    // Clear SymbolicOp.Value from above and also all other fields. +    std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + +    // At this point, GetOpInfo() did not find any relocation information about +    // this operand and we are left to use the SymbolLookUp() call back to guess +    // if the Value is the address of a symbol.  In the case this is a branch +    // that always makes sense to guess.  But in the case of an immediate it is +    // a bit more questionable if it is an address of a symbol or some other +    // reference.  So if the immediate Value comes from a width of 1 byte, +    // InstSize, we will not guess it is an address of a symbol.  Because in +    // object files assembled starting at address 0 this usually leads to +    // incorrect symbolication. +    if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) +      return false; + +    uint64_t ReferenceType; +    if (IsBranch) +       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; +    else +       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; +    const char *ReferenceName; +    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, +                                    &ReferenceName); +    if (Name) { +      SymbolicOp.AddSymbol.Name = Name; +      SymbolicOp.AddSymbol.Present = true; +      // If Name is a C++ symbol name put the human readable name in a comment. +      if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name) +        cStream << ReferenceName; +    } +    // For branches always create an MCExpr so it gets printed as hex address. +    else if (IsBranch) { +      SymbolicOp.Value = Value; +    } +    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) +      cStream << "symbol stub for: " << ReferenceName; +    else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) +      cStream << "Objc message: " << ReferenceName; +    if (!Name && !IsBranch) +      return false; +  } + +  const MCExpr *Add = nullptr; +  if (SymbolicOp.AddSymbol.Present) { +    if (SymbolicOp.AddSymbol.Name) { +      StringRef Name(SymbolicOp.AddSymbol.Name); +      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); +      Add = MCSymbolRefExpr::create(Sym, Ctx); +    } else { +      Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx); +    } +  } + +  const MCExpr *Sub = nullptr; +  if (SymbolicOp.SubtractSymbol.Present) { +      if (SymbolicOp.SubtractSymbol.Name) { +      StringRef Name(SymbolicOp.SubtractSymbol.Name); +      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name); +      Sub = MCSymbolRefExpr::create(Sym, Ctx); +    } else { +      Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx); +    } +  } + +  const MCExpr *Off = nullptr; +  if (SymbolicOp.Value != 0) +    Off = MCConstantExpr::create(SymbolicOp.Value, Ctx); + +  const MCExpr *Expr; +  if (Sub) { +    const MCExpr *LHS; +    if (Add) +      LHS = MCBinaryExpr::createSub(Add, Sub, Ctx); +    else +      LHS = MCUnaryExpr::createMinus(Sub, Ctx); +    if (Off) +      Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx); +    else +      Expr = LHS; +  } else if (Add) { +    if (Off) +      Expr = MCBinaryExpr::createAdd(Add, Off, Ctx); +    else +      Expr = Add; +  } else { +    if (Off) +      Expr = Off; +    else +      Expr = MCConstantExpr::create(0, Ctx); +  } + +  Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); +  if (!Expr) +    return false; + +  MI.addOperand(MCOperand::createExpr(Expr)); +  return true; +} + +// This function tries to add a comment as to what is being referenced by a load +// instruction with the base register that is the Pc.  These can often be values +// in a literal pool near the Address of the instruction. The Address of the +// instruction and its immediate Value are used as a possible literal pool entry. +// The SymbolLookUp call back will return the name of a symbol referenced by the +// literal pool's entry if the referenced address is that of a symbol. Or it +// will return a pointer to a literal 'C' string if the referenced address of +// the literal pool's entry is an address into a section with C string literals. +// Or if the reference is to an Objective-C data structure it will return a +// specific reference type for it and a string. +void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, +                                                           int64_t Value, +                                                           uint64_t Address) { +  if (SymbolLookUp) { +    uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; +    const char *ReferenceName; +    (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); +    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) +      cStream << "literal pool symbol address: " << ReferenceName; +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) { +      cStream << "literal pool for: \""; +      cStream.write_escaped(ReferenceName); +      cStream << "\""; +    } +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) +      cStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_Objc_Message) +      cStream << "Objc message: " << ReferenceName; +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) +      cStream << "Objc message ref: " << ReferenceName; +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) +      cStream << "Objc selector ref: " << ReferenceName; +    else if(ReferenceType == +            LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) +      cStream << "Objc class ref: " << ReferenceName; +  } +} + +namespace llvm { +MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo, +                                 LLVMSymbolLookupCallback SymbolLookUp, +                                 void *DisInfo, MCContext *Ctx, +                                 std::unique_ptr<MCRelocationInfo> &&RelInfo) { +  assert(Ctx && "No MCContext given for symbolic disassembly"); + +  return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo, +                                  SymbolLookUp, DisInfo); +} +} diff --git a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp new file mode 100644 index 0000000000000..64e216e0051da --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp @@ -0,0 +1,30 @@ +//===-- MCRelocationInfo.cpp ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler/MCRelocationInfo.h" +#include "llvm-c/Disassembler.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +MCRelocationInfo::MCRelocationInfo(MCContext &Ctx) : Ctx(Ctx) {} + +MCRelocationInfo::~MCRelocationInfo() = default; + +const MCExpr * +MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr, +                                               unsigned VariantKind) { +  if (VariantKind != LLVMDisassembler_VariantKind_None) +    return nullptr; +  return SubExpr; +} + +MCRelocationInfo *llvm::createMCRelocationInfo(const Triple &TT, +                                               MCContext &Ctx) { +  return new MCRelocationInfo(Ctx); +} diff --git a/llvm/lib/MC/MCDisassembler/MCSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCSymbolizer.cpp new file mode 100644 index 0000000000000..8214a196afb11 --- /dev/null +++ b/llvm/lib/MC/MCDisassembler/MCSymbolizer.cpp @@ -0,0 +1,13 @@ +//===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" + +using namespace llvm; + +MCSymbolizer::~MCSymbolizer() = default;  | 
