diff options
Diffstat (limited to 'llvm/tools/llvm-objdump/llvm-objdump.cpp')
-rw-r--r-- | llvm/tools/llvm-objdump/llvm-objdump.cpp | 1505 |
1 files changed, 1042 insertions, 463 deletions
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index 6bd37a1fb86c9..320bbb5d358b9 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -16,7 +16,14 @@ //===----------------------------------------------------------------------===// #include "llvm-objdump.h" +#include "COFFDump.h" +#include "ELFDump.h" +#include "MachODump.h" +#include "WasmDump.h" +#include "XCOFFDump.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" @@ -70,28 +77,13 @@ #include <unordered_map> #include <utility> +using namespace llvm; using namespace llvm::object; +using namespace llvm::objdump; -namespace llvm { - -cl::OptionCategory ObjdumpCat("llvm-objdump Options"); - -// MachO specific -extern cl::OptionCategory MachOCat; -extern cl::opt<bool> Bind; -extern cl::opt<bool> DataInCode; -extern cl::opt<bool> DylibsUsed; -extern cl::opt<bool> DylibId; -extern cl::opt<bool> ExportsTrie; -extern cl::opt<bool> FirstPrivateHeader; -extern cl::opt<bool> IndirectSymbols; -extern cl::opt<bool> InfoPlist; -extern cl::opt<bool> LazyBind; -extern cl::opt<bool> LinkOptHints; -extern cl::opt<bool> ObjcMetaData; -extern cl::opt<bool> Rebase; -extern cl::opt<bool> UniversalHeaders; -extern cl::opt<bool> WeakBind; +#define DEBUG_TYPE "objdump" + +static cl::OptionCategory ObjdumpCat("llvm-objdump Options"); static cl::opt<uint64_t> AdjustVMA( "adjust-vma", @@ -112,21 +104,22 @@ static cl::opt<std::string> "see -version for available targets"), cl::cat(ObjdumpCat)); -cl::opt<bool> ArchiveHeaders("archive-headers", - cl::desc("Display archive header information"), - cl::cat(ObjdumpCat)); +cl::opt<bool> + objdump::ArchiveHeaders("archive-headers", + cl::desc("Display archive header information"), + cl::cat(ObjdumpCat)); static cl::alias ArchiveHeadersShort("a", cl::desc("Alias for --archive-headers"), cl::NotHidden, cl::Grouping, cl::aliasopt(ArchiveHeaders)); -cl::opt<bool> Demangle("demangle", cl::desc("Demangle symbols names"), - cl::init(false), cl::cat(ObjdumpCat)); +cl::opt<bool> objdump::Demangle("demangle", cl::desc("Demangle symbols names"), + cl::init(false), cl::cat(ObjdumpCat)); static cl::alias DemangleShort("C", cl::desc("Alias for --demangle"), cl::NotHidden, cl::Grouping, cl::aliasopt(Demangle)); -cl::opt<bool> Disassemble( +cl::opt<bool> objdump::Disassemble( "disassemble", cl::desc("Display assembler mnemonics for the machine instructions"), cl::cat(ObjdumpCat)); @@ -134,7 +127,7 @@ static cl::alias DisassembleShort("d", cl::desc("Alias for --disassemble"), cl::NotHidden, cl::Grouping, cl::aliasopt(Disassemble)); -cl::opt<bool> DisassembleAll( +cl::opt<bool> objdump::DisassembleAll( "disassemble-all", cl::desc("Display assembler mnemonics for the machine instructions"), cl::cat(ObjdumpCat)); @@ -143,12 +136,18 @@ static cl::alias DisassembleAllShort("D", cl::NotHidden, cl::Grouping, cl::aliasopt(DisassembleAll)); +cl::opt<bool> objdump::SymbolDescription( + "symbol-description", + cl::desc("Add symbol description for disassembly. This " + "option is for XCOFF files only"), + cl::init(false), cl::cat(ObjdumpCat)); + static cl::list<std::string> - DisassembleFunctions("disassemble-functions", cl::CommaSeparated, - cl::desc("List of functions to disassemble. " - "Accept demangled names when --demangle is " - "specified, otherwise accept mangled names"), - cl::cat(ObjdumpCat)); + DisassembleSymbols("disassemble-symbols", cl::CommaSeparated, + cl::desc("List of symbols to disassemble. " + "Accept demangled names when --demangle is " + "specified, otherwise accept mangled names"), + cl::cat(ObjdumpCat)); static cl::opt<bool> DisassembleZeroes( "disassemble-zeroes", @@ -170,7 +169,7 @@ static cl::alias cl::CommaSeparated, cl::aliasopt(DisassemblerOptions)); -cl::opt<DIDumpType> DwarfDumpType( +cl::opt<DIDumpType> objdump::DwarfDumpType( "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"), cl::values(clEnumValN(DIDT_DebugFrame, "frames", ".debug_frame")), cl::cat(ObjdumpCat)); @@ -197,9 +196,10 @@ static cl::alias FileHeadersShort("f", cl::desc("Alias for --file-headers"), cl::NotHidden, cl::Grouping, cl::aliasopt(FileHeaders)); -cl::opt<bool> SectionContents("full-contents", - cl::desc("Display the content of each section"), - cl::cat(ObjdumpCat)); +cl::opt<bool> + objdump::SectionContents("full-contents", + cl::desc("Display the content of each section"), + cl::cat(ObjdumpCat)); static cl::alias SectionContentsShort("s", cl::desc("Alias for --full-contents"), cl::NotHidden, cl::Grouping, @@ -225,24 +225,24 @@ static cl::opt<bool> MachOOpt("macho", static cl::alias MachOm("m", cl::desc("Alias for --macho"), cl::NotHidden, cl::Grouping, cl::aliasopt(MachOOpt)); -cl::opt<std::string> - MCPU("mcpu", - cl::desc("Target a specific cpu type (-mcpu=help for details)"), - cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat)); +cl::opt<std::string> objdump::MCPU( + "mcpu", cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::init(""), cl::cat(ObjdumpCat)); -cl::list<std::string> MAttrs("mattr", cl::CommaSeparated, - cl::desc("Target specific attributes"), - cl::value_desc("a1,+a2,-a3,..."), - cl::cat(ObjdumpCat)); +cl::list<std::string> objdump::MAttrs("mattr", cl::CommaSeparated, + cl::desc("Target specific attributes"), + cl::value_desc("a1,+a2,-a3,..."), + cl::cat(ObjdumpCat)); -cl::opt<bool> NoShowRawInsn("no-show-raw-insn", - cl::desc("When disassembling " - "instructions, do not print " - "the instruction bytes."), - cl::cat(ObjdumpCat)); -cl::opt<bool> NoLeadingAddr("no-leading-addr", - cl::desc("Print no leading address"), - cl::cat(ObjdumpCat)); +cl::opt<bool> objdump::NoShowRawInsn( + "no-show-raw-insn", + cl::desc( + "When disassembling instructions, do not print the instruction bytes."), + cl::cat(ObjdumpCat)); + +cl::opt<bool> objdump::NoLeadingAddr("no-leading-addr", + cl::desc("Print no leading address"), + cl::cat(ObjdumpCat)); static cl::opt<bool> RawClangAST( "raw-clang-ast", @@ -250,37 +250,40 @@ static cl::opt<bool> RawClangAST( cl::cat(ObjdumpCat)); cl::opt<bool> - Relocations("reloc", cl::desc("Display the relocation entries in the file"), - cl::cat(ObjdumpCat)); + objdump::Relocations("reloc", + cl::desc("Display the relocation entries in the file"), + cl::cat(ObjdumpCat)); static cl::alias RelocationsShort("r", cl::desc("Alias for --reloc"), cl::NotHidden, cl::Grouping, cl::aliasopt(Relocations)); -cl::opt<bool> PrintImmHex("print-imm-hex", - cl::desc("Use hex format for immediate values"), - cl::cat(ObjdumpCat)); +cl::opt<bool> + objdump::PrintImmHex("print-imm-hex", + cl::desc("Use hex format for immediate values"), + cl::cat(ObjdumpCat)); -cl::opt<bool> PrivateHeaders("private-headers", - cl::desc("Display format specific file headers"), - cl::cat(ObjdumpCat)); +cl::opt<bool> + objdump::PrivateHeaders("private-headers", + cl::desc("Display format specific file headers"), + cl::cat(ObjdumpCat)); static cl::alias PrivateHeadersShort("p", cl::desc("Alias for --private-headers"), cl::NotHidden, cl::Grouping, cl::aliasopt(PrivateHeaders)); cl::list<std::string> - FilterSections("section", - cl::desc("Operate on the specified sections only. " - "With -macho dump segment,section"), - cl::cat(ObjdumpCat)); + objdump::FilterSections("section", + cl::desc("Operate on the specified sections only. " + "With -macho dump segment,section"), + cl::cat(ObjdumpCat)); static cl::alias FilterSectionsj("j", cl::desc("Alias for --section"), cl::NotHidden, cl::Grouping, cl::Prefix, cl::aliasopt(FilterSections)); -cl::opt<bool> SectionHeaders("section-headers", - cl::desc("Display summaries of the " - "headers for each section."), - cl::cat(ObjdumpCat)); +cl::opt<bool> objdump::SectionHeaders( + "section-headers", + cl::desc("Display summaries of the headers for each section."), + cl::cat(ObjdumpCat)); static cl::alias SectionHeadersShort("headers", cl::desc("Alias for --section-headers"), cl::NotHidden, @@ -312,19 +315,30 @@ static cl::opt<uint64_t> StopAddress("stop-address", cl::value_desc("address"), cl::init(UINT64_MAX), cl::cat(ObjdumpCat)); -cl::opt<bool> SymbolTable("syms", cl::desc("Display the symbol table"), - cl::cat(ObjdumpCat)); +cl::opt<bool> objdump::SymbolTable("syms", cl::desc("Display the symbol table"), + cl::cat(ObjdumpCat)); static cl::alias SymbolTableShort("t", cl::desc("Alias for --syms"), cl::NotHidden, cl::Grouping, cl::aliasopt(SymbolTable)); -cl::opt<std::string> TripleName("triple", - cl::desc("Target triple to disassemble for, " - "see -version for available targets"), - cl::cat(ObjdumpCat)); +static cl::opt<bool> DynamicSymbolTable( + "dynamic-syms", + cl::desc("Display the contents of the dynamic symbol table"), + cl::cat(ObjdumpCat)); +static cl::alias DynamicSymbolTableShort("T", + cl::desc("Alias for --dynamic-syms"), + cl::NotHidden, cl::Grouping, + cl::aliasopt(DynamicSymbolTable)); -cl::opt<bool> UnwindInfo("unwind-info", cl::desc("Display unwind information"), - cl::cat(ObjdumpCat)); +cl::opt<std::string> objdump::TripleName( + "triple", + cl::desc( + "Target triple to disassemble for, see -version for available targets"), + cl::cat(ObjdumpCat)); + +cl::opt<bool> objdump::UnwindInfo("unwind-info", + cl::desc("Display unwind information"), + cl::cat(ObjdumpCat)); static cl::alias UnwindInfoShort("u", cl::desc("Alias for --unwind-info"), cl::NotHidden, cl::Grouping, cl::aliasopt(UnwindInfo)); @@ -334,15 +348,35 @@ static cl::opt<bool> cl::cat(ObjdumpCat)); static cl::alias WideShort("w", cl::Grouping, cl::aliasopt(Wide)); +enum DebugVarsFormat { + DVDisabled, + DVUnicode, + DVASCII, +}; + +static cl::opt<DebugVarsFormat> DbgVariables( + "debug-vars", cl::init(DVDisabled), + cl::desc("Print the locations (in registers or memory) of " + "source-level variables alongside disassembly"), + cl::ValueOptional, + cl::values(clEnumValN(DVUnicode, "", "unicode"), + clEnumValN(DVUnicode, "unicode", "unicode"), + clEnumValN(DVASCII, "ascii", "unicode")), + cl::cat(ObjdumpCat)); + +static cl::opt<int> + DbgIndent("debug-vars-indent", cl::init(40), + cl::desc("Distance to indent the source-level variable display, " + "relative to the start of the disassembly"), + cl::cat(ObjdumpCat)); + static cl::extrahelp HelpResponse("\nPass @FILE as argument to read options from FILE.\n"); -static StringSet<> DisasmFuncsSet; -static StringSet<> FoundSectionSet; +static StringSet<> DisasmSymbolSet; +StringSet<> objdump::FoundSectionSet; static StringRef ToolName; -typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy; - namespace { struct FilterResult { // True if the section should not be skipped. @@ -378,7 +412,8 @@ static FilterResult checkSectionFilter(object::SectionRef S) { /*IncrementIndex=*/true}; } -SectionFilter ToolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) { +SectionFilter objdump::ToolSectionFilter(object::ObjectFile const &O, + uint64_t *Idx) { // Start at UINT64_MAX so that the first index returned after an increment is // zero (after the unsigned wrap). if (Idx) @@ -393,35 +428,37 @@ SectionFilter ToolSectionFilter(object::ObjectFile const &O, uint64_t *Idx) { O); } -std::string getFileNameForError(const object::Archive::Child &C, - unsigned Index) { +std::string objdump::getFileNameForError(const object::Archive::Child &C, + unsigned Index) { Expected<StringRef> NameOrErr = C.getName(); if (NameOrErr) - return NameOrErr.get(); + return std::string(NameOrErr.get()); // If we have an error getting the name then we print the index of the archive // member. Since we are already in an error state, we just ignore this error. consumeError(NameOrErr.takeError()); return "<file index: " + std::to_string(Index) + ">"; } -void reportWarning(Twine Message, StringRef File) { +void objdump::reportWarning(Twine Message, StringRef File) { // Output order between errs() and outs() matters especially for archive // files where the output is per member object. outs().flush(); WithColor::warning(errs(), ToolName) << "'" << File << "': " << Message << "\n"; - errs().flush(); } -LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, Twine Message) { +LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File, + Twine Message) { + outs().flush(); WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n"; exit(1); } -LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, - StringRef ArchiveName, - StringRef ArchitectureName) { +LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName, + StringRef ArchiveName, + StringRef ArchitectureName) { assert(E); + outs().flush(); WithColor::error(errs(), ToolName); if (ArchiveName != "") errs() << ArchiveName << "(" << FileName << ")"; @@ -429,11 +466,8 @@ LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName, errs() << "'" << FileName << "'"; if (!ArchitectureName.empty()) errs() << " (for architecture " << ArchitectureName << ")"; - std::string Buf; - raw_string_ostream OS(Buf); - logAllUnhandledErrors(std::move(E), OS); - OS.flush(); - errs() << ": " << Buf; + errs() << ": "; + logAllUnhandledErrors(std::move(E), errs()); exit(1); } @@ -487,7 +521,7 @@ static const Target *getTarget(const ObjectFile *Obj) { return TheTarget; } -bool isRelocAddressLess(RelocationRef A, RelocationRef B) { +bool objdump::isRelocAddressLess(RelocationRef A, RelocationRef B) { return A.getOffset() < B.getOffset(); } @@ -502,6 +536,8 @@ static Error getRelocationValueString(const RelocationRef &Rel, return getWasmRelocationValueString(Wasm, Rel, Result); if (auto *MachO = dyn_cast<MachOObjectFile>(Obj)) return getMachORelocationValueString(MachO, Rel, Result); + if (auto *XCOFF = dyn_cast<XCOFFObjectFile>(Obj)) + return getXCOFFRelocationValueString(XCOFF, Rel, Result); llvm_unreachable("unknown object file format"); } @@ -538,6 +574,358 @@ static bool getHidden(RelocationRef RelRef) { } namespace { + +/// Get the column at which we want to start printing the instruction +/// disassembly, taking into account anything which appears to the left of it. +unsigned getInstStartColumn(const MCSubtargetInfo &STI) { + return NoShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24; +} + +/// Stores a single expression representing the location of a source-level +/// variable, along with the PC range for which that expression is valid. +struct LiveVariable { + DWARFLocationExpression LocExpr; + const char *VarName; + DWARFUnit *Unit; + const DWARFDie FuncDie; + + LiveVariable(const DWARFLocationExpression &LocExpr, const char *VarName, + DWARFUnit *Unit, const DWARFDie FuncDie) + : LocExpr(LocExpr), VarName(VarName), Unit(Unit), FuncDie(FuncDie) {} + + bool liveAtAddress(object::SectionedAddress Addr) { + if (LocExpr.Range == None) + return false; + return LocExpr.Range->SectionIndex == Addr.SectionIndex && + LocExpr.Range->LowPC <= Addr.Address && + LocExpr.Range->HighPC > Addr.Address; + } + + void print(raw_ostream &OS, const MCRegisterInfo &MRI) const { + DataExtractor Data({LocExpr.Expr.data(), LocExpr.Expr.size()}, + Unit->getContext().isLittleEndian(), 0); + DWARFExpression Expression(Data, Unit->getAddressByteSize()); + Expression.printCompact(OS, MRI); + } +}; + +/// Helper class for printing source variable locations alongside disassembly. +class LiveVariablePrinter { + // Information we want to track about one column in which we are printing a + // variable live range. + struct Column { + unsigned VarIdx = NullVarIdx; + bool LiveIn = false; + bool LiveOut = false; + bool MustDrawLabel = false; + + bool isActive() const { return VarIdx != NullVarIdx; } + + static constexpr unsigned NullVarIdx = std::numeric_limits<unsigned>::max(); + }; + + // All live variables we know about in the object/image file. + std::vector<LiveVariable> LiveVariables; + + // The columns we are currently drawing. + IndexedMap<Column> ActiveCols; + + const MCRegisterInfo &MRI; + const MCSubtargetInfo &STI; + + void addVariable(DWARFDie FuncDie, DWARFDie VarDie) { + uint64_t FuncLowPC, FuncHighPC, SectionIndex; + FuncDie.getLowAndHighPC(FuncLowPC, FuncHighPC, SectionIndex); + const char *VarName = VarDie.getName(DINameKind::ShortName); + DWARFUnit *U = VarDie.getDwarfUnit(); + + Expected<DWARFLocationExpressionsVector> Locs = + VarDie.getLocations(dwarf::DW_AT_location); + if (!Locs) { + // If the variable doesn't have any locations, just ignore it. We don't + // report an error or warning here as that could be noisy on optimised + // code. + consumeError(Locs.takeError()); + return; + } + + for (const DWARFLocationExpression &LocExpr : *Locs) { + if (LocExpr.Range) { + LiveVariables.emplace_back(LocExpr, VarName, U, FuncDie); + } else { + // If the LocExpr does not have an associated range, it is valid for + // the whole of the function. + // TODO: technically it is not valid for any range covered by another + // LocExpr, does that happen in reality? + DWARFLocationExpression WholeFuncExpr{ + DWARFAddressRange(FuncLowPC, FuncHighPC, SectionIndex), + LocExpr.Expr}; + LiveVariables.emplace_back(WholeFuncExpr, VarName, U, FuncDie); + } + } + } + + void addFunction(DWARFDie D) { + for (const DWARFDie &Child : D.children()) { + if (Child.getTag() == dwarf::DW_TAG_variable || + Child.getTag() == dwarf::DW_TAG_formal_parameter) + addVariable(D, Child); + else + addFunction(Child); + } + } + + // Get the column number (in characters) at which the first live variable + // line should be printed. + unsigned getIndentLevel() const { + return DbgIndent + getInstStartColumn(STI); + } + + // Indent to the first live-range column to the right of the currently + // printed line, and return the index of that column. + // TODO: formatted_raw_ostream uses "column" to mean a number of characters + // since the last \n, and we use it to mean the number of slots in which we + // put live variable lines. Pick a less overloaded word. + unsigned moveToFirstVarColumn(formatted_raw_ostream &OS) { + // Logical column number: column zero is the first column we print in, each + // logical column is 2 physical columns wide. + unsigned FirstUnprintedLogicalColumn = + std::max((int)(OS.getColumn() - getIndentLevel() + 1) / 2, 0); + // Physical column number: the actual column number in characters, with + // zero being the left-most side of the screen. + unsigned FirstUnprintedPhysicalColumn = + getIndentLevel() + FirstUnprintedLogicalColumn * 2; + + if (FirstUnprintedPhysicalColumn > OS.getColumn()) + OS.PadToColumn(FirstUnprintedPhysicalColumn); + + return FirstUnprintedLogicalColumn; + } + + unsigned findFreeColumn() { + for (unsigned ColIdx = 0; ColIdx < ActiveCols.size(); ++ColIdx) + if (!ActiveCols[ColIdx].isActive()) + return ColIdx; + + size_t OldSize = ActiveCols.size(); + ActiveCols.grow(std::max<size_t>(OldSize * 2, 1)); + return OldSize; + } + +public: + LiveVariablePrinter(const MCRegisterInfo &MRI, const MCSubtargetInfo &STI) + : LiveVariables(), ActiveCols(Column()), MRI(MRI), STI(STI) {} + + void dump() const { + for (const LiveVariable &LV : LiveVariables) { + dbgs() << LV.VarName << " @ " << LV.LocExpr.Range << ": "; + LV.print(dbgs(), MRI); + dbgs() << "\n"; + } + } + + void addCompileUnit(DWARFDie D) { + if (D.getTag() == dwarf::DW_TAG_subprogram) + addFunction(D); + else + for (const DWARFDie &Child : D.children()) + addFunction(Child); + } + + /// Update to match the state of the instruction between ThisAddr and + /// NextAddr. In the common case, any live range active at ThisAddr is + /// live-in to the instruction, and any live range active at NextAddr is + /// live-out of the instruction. If IncludeDefinedVars is false, then live + /// ranges starting at NextAddr will be ignored. + void update(object::SectionedAddress ThisAddr, + object::SectionedAddress NextAddr, bool IncludeDefinedVars) { + // First, check variables which have already been assigned a column, so + // that we don't change their order. + SmallSet<unsigned, 8> CheckedVarIdxs; + for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx) { + if (!ActiveCols[ColIdx].isActive()) + continue; + CheckedVarIdxs.insert(ActiveCols[ColIdx].VarIdx); + LiveVariable &LV = LiveVariables[ActiveCols[ColIdx].VarIdx]; + ActiveCols[ColIdx].LiveIn = LV.liveAtAddress(ThisAddr); + ActiveCols[ColIdx].LiveOut = LV.liveAtAddress(NextAddr); + LLVM_DEBUG(dbgs() << "pass 1, " << ThisAddr.Address << "-" + << NextAddr.Address << ", " << LV.VarName << ", Col " + << ColIdx << ": LiveIn=" << ActiveCols[ColIdx].LiveIn + << ", LiveOut=" << ActiveCols[ColIdx].LiveOut << "\n"); + + if (!ActiveCols[ColIdx].LiveIn && !ActiveCols[ColIdx].LiveOut) + ActiveCols[ColIdx].VarIdx = Column::NullVarIdx; + } + + // Next, look for variables which don't already have a column, but which + // are now live. + if (IncludeDefinedVars) { + for (unsigned VarIdx = 0, End = LiveVariables.size(); VarIdx < End; + ++VarIdx) { + if (CheckedVarIdxs.count(VarIdx)) + continue; + LiveVariable &LV = LiveVariables[VarIdx]; + bool LiveIn = LV.liveAtAddress(ThisAddr); + bool LiveOut = LV.liveAtAddress(NextAddr); + if (!LiveIn && !LiveOut) + continue; + + unsigned ColIdx = findFreeColumn(); + LLVM_DEBUG(dbgs() << "pass 2, " << ThisAddr.Address << "-" + << NextAddr.Address << ", " << LV.VarName << ", Col " + << ColIdx << ": LiveIn=" << LiveIn + << ", LiveOut=" << LiveOut << "\n"); + ActiveCols[ColIdx].VarIdx = VarIdx; + ActiveCols[ColIdx].LiveIn = LiveIn; + ActiveCols[ColIdx].LiveOut = LiveOut; + ActiveCols[ColIdx].MustDrawLabel = true; + } + } + } + + enum class LineChar { + RangeStart, + RangeMid, + RangeEnd, + LabelVert, + LabelCornerNew, + LabelCornerActive, + LabelHoriz, + }; + const char *getLineChar(LineChar C) const { + bool IsASCII = DbgVariables == DVASCII; + switch (C) { + case LineChar::RangeStart: + return IsASCII ? "^" : u8"\u2548"; + case LineChar::RangeMid: + return IsASCII ? "|" : u8"\u2503"; + case LineChar::RangeEnd: + return IsASCII ? "v" : u8"\u253b"; + case LineChar::LabelVert: + return IsASCII ? "|" : u8"\u2502"; + case LineChar::LabelCornerNew: + return IsASCII ? "/" : u8"\u250c"; + case LineChar::LabelCornerActive: + return IsASCII ? "|" : u8"\u2520"; + case LineChar::LabelHoriz: + return IsASCII ? "-" : u8"\u2500"; + } + llvm_unreachable("Unhandled LineChar enum"); + } + + /// Print live ranges to the right of an existing line. This assumes the + /// line is not an instruction, so doesn't start or end any live ranges, so + /// we only need to print active ranges or empty columns. If AfterInst is + /// true, this is being printed after the last instruction fed to update(), + /// otherwise this is being printed before it. + void printAfterOtherLine(formatted_raw_ostream &OS, bool AfterInst) { + if (ActiveCols.size()) { + unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS); + for (size_t ColIdx = FirstUnprintedColumn, End = ActiveCols.size(); + ColIdx < End; ++ColIdx) { + if (ActiveCols[ColIdx].isActive()) { + if ((AfterInst && ActiveCols[ColIdx].LiveOut) || + (!AfterInst && ActiveCols[ColIdx].LiveIn)) + OS << getLineChar(LineChar::RangeMid); + else if (!AfterInst && ActiveCols[ColIdx].LiveOut) + OS << getLineChar(LineChar::LabelVert); + else + OS << " "; + } + OS << " "; + } + } + OS << "\n"; + } + + /// Print any live variable range info needed to the right of a + /// non-instruction line of disassembly. This is where we print the variable + /// names and expressions, with thin line-drawing characters connecting them + /// to the live range which starts at the next instruction. If MustPrint is + /// true, we have to print at least one line (with the continuation of any + /// already-active live ranges) because something has already been printed + /// earlier on this line. + void printBetweenInsts(formatted_raw_ostream &OS, bool MustPrint) { + bool PrintedSomething = false; + for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx) { + if (ActiveCols[ColIdx].isActive() && ActiveCols[ColIdx].MustDrawLabel) { + // First we need to print the live range markers for any active + // columns to the left of this one. + OS.PadToColumn(getIndentLevel()); + for (unsigned ColIdx2 = 0; ColIdx2 < ColIdx; ++ColIdx2) { + if (ActiveCols[ColIdx2].isActive()) { + if (ActiveCols[ColIdx2].MustDrawLabel && + !ActiveCols[ColIdx2].LiveIn) + OS << getLineChar(LineChar::LabelVert) << " "; + else + OS << getLineChar(LineChar::RangeMid) << " "; + } else + OS << " "; + } + + // Then print the variable name and location of the new live range, + // with box drawing characters joining it to the live range line. + OS << getLineChar(ActiveCols[ColIdx].LiveIn + ? LineChar::LabelCornerActive + : LineChar::LabelCornerNew) + << getLineChar(LineChar::LabelHoriz) << " "; + WithColor(OS, raw_ostream::GREEN) + << LiveVariables[ActiveCols[ColIdx].VarIdx].VarName; + OS << " = "; + { + WithColor ExprColor(OS, raw_ostream::CYAN); + LiveVariables[ActiveCols[ColIdx].VarIdx].print(OS, MRI); + } + + // If there are any columns to the right of the expression we just + // printed, then continue their live range lines. + unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS); + for (unsigned ColIdx2 = FirstUnprintedColumn, End = ActiveCols.size(); + ColIdx2 < End; ++ColIdx2) { + if (ActiveCols[ColIdx2].isActive() && ActiveCols[ColIdx2].LiveIn) + OS << getLineChar(LineChar::RangeMid) << " "; + else + OS << " "; + } + + OS << "\n"; + PrintedSomething = true; + } + } + + for (unsigned ColIdx = 0, End = ActiveCols.size(); ColIdx < End; ++ColIdx) + if (ActiveCols[ColIdx].isActive()) + ActiveCols[ColIdx].MustDrawLabel = false; + + // If we must print something (because we printed a line/column number), + // but don't have any new variables to print, then print a line which + // just continues any existing live ranges. + if (MustPrint && !PrintedSomething) + printAfterOtherLine(OS, false); + } + + /// Print the live variable ranges to the right of a disassembled instruction. + void printAfterInst(formatted_raw_ostream &OS) { + if (!ActiveCols.size()) + return; + unsigned FirstUnprintedColumn = moveToFirstVarColumn(OS); + for (unsigned ColIdx = FirstUnprintedColumn, End = ActiveCols.size(); + ColIdx < End; ++ColIdx) { + if (!ActiveCols[ColIdx].isActive()) + OS << " "; + else if (ActiveCols[ColIdx].LiveIn && ActiveCols[ColIdx].LiveOut) + OS << getLineChar(LineChar::RangeMid) << " "; + else if (ActiveCols[ColIdx].LiveOut) + OS << getLineChar(LineChar::RangeStart) << " "; + else if (ActiveCols[ColIdx].LiveIn) + OS << getLineChar(LineChar::RangeEnd) << " "; + else + llvm_unreachable("var must be live in or out!"); + } + } +}; + class SourcePrinter { protected: DILineInfo OldLineInfo; @@ -555,20 +943,29 @@ protected: private: bool cacheSource(const DILineInfo& LineInfoFile); + void printLines(formatted_raw_ostream &OS, const DILineInfo &LineInfo, + StringRef Delimiter, LiveVariablePrinter &LVP); + + void printSources(formatted_raw_ostream &OS, const DILineInfo &LineInfo, + StringRef ObjectFilename, StringRef Delimiter, + LiveVariablePrinter &LVP); + public: SourcePrinter() = default; SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj), WarnedNoDebugInfo(false) { symbolize::LLVMSymbolizer::Options SymbolizerOpts; - SymbolizerOpts.PrintFunctions = DILineInfoSpecifier::FunctionNameKind::None; - SymbolizerOpts.Demangle = false; - SymbolizerOpts.DefaultArch = DefaultArch; + SymbolizerOpts.PrintFunctions = + DILineInfoSpecifier::FunctionNameKind::LinkageName; + SymbolizerOpts.Demangle = Demangle; + SymbolizerOpts.DefaultArch = std::string(DefaultArch); Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts)); } virtual ~SourcePrinter() = default; - virtual void printSourceLine(raw_ostream &OS, + virtual void printSourceLine(formatted_raw_ostream &OS, object::SectionedAddress Address, StringRef ObjectFilename, + LiveVariablePrinter &LVP, StringRef Delimiter = "; "); }; @@ -602,9 +999,10 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { return true; } -void SourcePrinter::printSourceLine(raw_ostream &OS, +void SourcePrinter::printSourceLine(formatted_raw_ostream &OS, object::SectionedAddress Address, StringRef ObjectFilename, + LiveVariablePrinter &LVP, StringRef Delimiter) { if (!Symbolizer) return; @@ -626,34 +1024,62 @@ void SourcePrinter::printSourceLine(raw_ostream &OS, reportWarning(Warning, ObjectFilename); WarnedNoDebugInfo = true; } - return; } - if (LineInfo.Line == 0 || ((OldLineInfo.Line == LineInfo.Line) && - (OldLineInfo.FileName == LineInfo.FileName))) + if (PrintLines) + printLines(OS, LineInfo, Delimiter, LVP); + if (PrintSource) + printSources(OS, LineInfo, ObjectFilename, Delimiter, LVP); + OldLineInfo = LineInfo; +} + +void SourcePrinter::printLines(formatted_raw_ostream &OS, + const DILineInfo &LineInfo, StringRef Delimiter, + LiveVariablePrinter &LVP) { + bool PrintFunctionName = LineInfo.FunctionName != DILineInfo::BadString && + LineInfo.FunctionName != OldLineInfo.FunctionName; + if (PrintFunctionName) { + OS << Delimiter << LineInfo.FunctionName; + // If demangling is successful, FunctionName will end with "()". Print it + // only if demangling did not run or was unsuccessful. + if (!StringRef(LineInfo.FunctionName).endswith("()")) + OS << "()"; + OS << ":\n"; + } + if (LineInfo.FileName != DILineInfo::BadString && LineInfo.Line != 0 && + (OldLineInfo.Line != LineInfo.Line || + OldLineInfo.FileName != LineInfo.FileName || PrintFunctionName)) { + OS << Delimiter << LineInfo.FileName << ":" << LineInfo.Line; + LVP.printBetweenInsts(OS, true); + } +} + +void SourcePrinter::printSources(formatted_raw_ostream &OS, + const DILineInfo &LineInfo, + StringRef ObjectFilename, StringRef Delimiter, + LiveVariablePrinter &LVP) { + if (LineInfo.FileName == DILineInfo::BadString || LineInfo.Line == 0 || + (OldLineInfo.Line == LineInfo.Line && + OldLineInfo.FileName == LineInfo.FileName)) return; - if (PrintLines) - OS << Delimiter << LineInfo.FileName << ":" << LineInfo.Line << "\n"; - if (PrintSource) { - if (SourceCache.find(LineInfo.FileName) == SourceCache.end()) - if (!cacheSource(LineInfo)) - return; - auto LineBuffer = LineCache.find(LineInfo.FileName); - if (LineBuffer != LineCache.end()) { - if (LineInfo.Line > LineBuffer->second.size()) { - reportWarning( - formatv( - "debug info line number {0} exceeds the number of lines in {1}", - LineInfo.Line, LineInfo.FileName), - ObjectFilename); - return; - } - // Vector begins at 0, line numbers are non-zero - OS << Delimiter << LineBuffer->second[LineInfo.Line - 1] << '\n'; + if (SourceCache.find(LineInfo.FileName) == SourceCache.end()) + if (!cacheSource(LineInfo)) + return; + auto LineBuffer = LineCache.find(LineInfo.FileName); + if (LineBuffer != LineCache.end()) { + if (LineInfo.Line > LineBuffer->second.size()) { + reportWarning( + formatv( + "debug info line number {0} exceeds the number of lines in {1}", + LineInfo.Line, LineInfo.FileName), + ObjectFilename); + return; } + // Vector begins at 0, line numbers are non-zero + OS << Delimiter << LineBuffer->second[LineInfo.Line - 1]; + LVP.printBetweenInsts(OS, true); } - OldLineInfo = LineInfo; } static bool isAArch64Elf(const ObjectFile *Obj) { @@ -670,28 +1096,30 @@ static bool hasMappingSymbols(const ObjectFile *Obj) { return isArmElf(Obj) || isAArch64Elf(Obj); } -static void printRelocation(StringRef FileName, const RelocationRef &Rel, - uint64_t Address, bool Is64Bits) { +static void printRelocation(formatted_raw_ostream &OS, StringRef FileName, + const RelocationRef &Rel, uint64_t Address, + bool Is64Bits) { StringRef Fmt = Is64Bits ? "\t\t%016" PRIx64 ": " : "\t\t\t%08" PRIx64 ": "; SmallString<16> Name; SmallString<32> Val; Rel.getTypeName(Name); if (Error E = getRelocationValueString(Rel, Val)) reportError(std::move(E), FileName); - outs() << format(Fmt.data(), Address) << Name << "\t" << Val << "\n"; + OS << format(Fmt.data(), Address) << Name << "\t" << Val; } class PrettyPrinter { public: virtual ~PrettyPrinter() = default; - virtual void printInst(MCInstPrinter &IP, const MCInst *MI, - ArrayRef<uint8_t> Bytes, - object::SectionedAddress Address, raw_ostream &OS, - StringRef Annot, MCSubtargetInfo const &STI, - SourcePrinter *SP, StringRef ObjectFilename, - std::vector<RelocationRef> *Rels = nullptr) { + virtual void + printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, + object::SectionedAddress Address, formatted_raw_ostream &OS, + StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, + StringRef ObjectFilename, std::vector<RelocationRef> *Rels, + LiveVariablePrinter &LVP) { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ObjectFilename); + SP->printSourceLine(OS, Address, ObjectFilename, LVP); + LVP.printBetweenInsts(OS, false); size_t Start = OS.tell(); if (!NoLeadingAddr) @@ -703,13 +1131,19 @@ public: // The output of printInst starts with a tab. Print some spaces so that // the tab has 1 column and advances to the target tab stop. - unsigned TabStop = NoShowRawInsn ? 16 : 40; + unsigned TabStop = getInstStartColumn(STI); unsigned Column = OS.tell() - Start; OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8); - if (MI) - IP.printInst(MI, Address.Address, "", STI, OS); - else + if (MI) { + // See MCInstPrinter::printInst. On targets where a PC relative immediate + // is relative to the next instruction and the length of a MCInst is + // difficult to measure (x86), this is the address of the next + // instruction. + uint64_t Addr = + Address.Address + (STI.getTargetTriple().isX86() ? Bytes.size() : 0); + IP.printInst(MI, Addr, "", STI, OS); + } else OS << "\t<unknown>"; } }; @@ -718,7 +1152,7 @@ PrettyPrinter PrettyPrinterInst; class HexagonPrettyPrinter : public PrettyPrinter { public: void printLead(ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &OS) { + formatted_raw_ostream &OS) { uint32_t opcode = (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | Bytes[0]; if (!NoLeadingAddr) @@ -730,12 +1164,12 @@ public: } } void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, - StringRef ObjectFilename, - std::vector<RelocationRef> *Rels) override { + StringRef ObjectFilename, std::vector<RelocationRef> *Rels, + LiveVariablePrinter &LVP) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ObjectFilename, ""); + SP->printSourceLine(OS, Address, ObjectFilename, LVP, ""); if (!MI) { printLead(Bytes, Address.Address, OS); OS << " <unknown>"; @@ -761,7 +1195,7 @@ public: auto PrintReloc = [&]() -> void { while ((RelCur != RelEnd) && (RelCur->getOffset() <= Address.Address)) { if (RelCur->getOffset() == Address.Address) { - printRelocation(ObjectFilename, *RelCur, Address.Address, false); + printRelocation(OS, ObjectFilename, *RelCur, Address.Address, false); return; } ++RelCur; @@ -772,7 +1206,7 @@ public: OS << Separator; Separator = "\n"; if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ObjectFilename, ""); + SP->printSourceLine(OS, Address, ObjectFilename, LVP, ""); printLead(Bytes, Address.Address, OS); OS << Preamble; Preamble = " "; @@ -800,12 +1234,12 @@ HexagonPrettyPrinter HexagonPrettyPrinterInst; class AMDGCNPrettyPrinter : public PrettyPrinter { public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, - StringRef ObjectFilename, - std::vector<RelocationRef> *Rels) override { + StringRef ObjectFilename, std::vector<RelocationRef> *Rels, + LiveVariablePrinter &LVP) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ObjectFilename); + SP->printSourceLine(OS, Address, ObjectFilename, LVP); if (MI) { SmallString<40> InstStr; @@ -852,12 +1286,12 @@ AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst; class BPFPrettyPrinter : public PrettyPrinter { public: void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes, - object::SectionedAddress Address, raw_ostream &OS, + object::SectionedAddress Address, formatted_raw_ostream &OS, StringRef Annot, MCSubtargetInfo const &STI, SourcePrinter *SP, - StringRef ObjectFilename, - std::vector<RelocationRef> *Rels) override { + StringRef ObjectFilename, std::vector<RelocationRef> *Rels, + LiveVariablePrinter &LVP) override { if (SP && (PrintSource || PrintLines)) - SP->printSourceLine(OS, Address, ObjectFilename); + SP->printSourceLine(OS, Address, ObjectFilename, LVP); if (!NoLeadingAddr) OS << format("%8" PRId64 ":", Address.Address / 8); if (!NoShowRawInsn) { @@ -1023,7 +1457,7 @@ getRelocsMap(object::ObjectFile const &Obj) { // TODO: implement for other file formats. static bool shouldAdjustVA(const SectionRef &Section) { const ObjectFile *Obj = Section.getObject(); - if (isa<object::ELFObjectFileBase>(Obj)) + if (Obj->isELF()) return ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC; return false; } @@ -1043,37 +1477,31 @@ static char getMappingSymbolKind(ArrayRef<MappingSymbolPair> MappingSymbols, return (It - 1)->second; } -static uint64_t -dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, - const ObjectFile *Obj, ArrayRef<uint8_t> Bytes, - ArrayRef<MappingSymbolPair> MappingSymbols) { +static uint64_t dumpARMELFData(uint64_t SectionAddr, uint64_t Index, + uint64_t End, const ObjectFile *Obj, + ArrayRef<uint8_t> Bytes, + ArrayRef<MappingSymbolPair> MappingSymbols, + raw_ostream &OS) { support::endianness Endian = Obj->isLittleEndian() ? support::little : support::big; - while (Index < End) { - outs() << format("%8" PRIx64 ":", SectionAddr + Index); - outs() << "\t"; - if (Index + 4 <= End) { - dumpBytes(Bytes.slice(Index, 4), outs()); - outs() << "\t.word\t" - << format_hex( - support::endian::read32(Bytes.data() + Index, Endian), 10); - Index += 4; - } else if (Index + 2 <= End) { - dumpBytes(Bytes.slice(Index, 2), outs()); - outs() << "\t\t.short\t" - << format_hex( - support::endian::read16(Bytes.data() + Index, Endian), 6); - Index += 2; - } else { - dumpBytes(Bytes.slice(Index, 1), outs()); - outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4); - ++Index; - } - outs() << "\n"; - if (getMappingSymbolKind(MappingSymbols, Index) != 'd') - break; + OS << format("%8" PRIx64 ":\t", SectionAddr + Index); + if (Index + 4 <= End) { + dumpBytes(Bytes.slice(Index, 4), OS); + OS << "\t.word\t" + << format_hex(support::endian::read32(Bytes.data() + Index, Endian), + 10); + return 4; + } + if (Index + 2 <= End) { + dumpBytes(Bytes.slice(Index, 2), OS); + OS << "\t\t.short\t" + << format_hex(support::endian::read16(Bytes.data() + Index, Endian), + 6); + return 2; } - return Index; + dumpBytes(Bytes.slice(Index, 1), OS); + OS << "\t\t.byte\t" << format_hex(Bytes[0], 4); + return 1; } static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, @@ -1110,6 +1538,36 @@ static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End, } } +SymbolInfoTy objdump::createSymbolInfo(const ObjectFile *Obj, + const SymbolRef &Symbol) { + const StringRef FileName = Obj->getFileName(); + const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName); + const StringRef Name = unwrapOrError(Symbol.getName(), FileName); + + if (Obj->isXCOFF() && SymbolDescription) { + const auto *XCOFFObj = cast<XCOFFObjectFile>(Obj); + DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl(); + + const uint32_t SymbolIndex = XCOFFObj->getSymbolIndex(SymbolDRI.p); + Optional<XCOFF::StorageMappingClass> Smc = + getXCOFFSymbolCsectSMC(XCOFFObj, Symbol); + return SymbolInfoTy(Addr, Name, Smc, SymbolIndex, + isLabel(XCOFFObj, Symbol)); + } else + return SymbolInfoTy(Addr, Name, + Obj->isELF() ? getElfSymbolType(Obj, Symbol) + : (uint8_t)ELF::STT_NOTYPE); +} + +static SymbolInfoTy createDummySymbolInfo(const ObjectFile *Obj, + const uint64_t Addr, StringRef &Name, + uint8_t Type) { + if (Obj->isXCOFF() && SymbolDescription) + return SymbolInfoTy(Addr, Name, None, None, false); + else + return SymbolInfoTy(Addr, Name, Type); +} + static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, MCContext &Ctx, MCDisassembler *PrimaryDisAsm, MCDisassembler *SecondaryDisAsm, @@ -1136,20 +1594,14 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, const StringRef FileName = Obj->getFileName(); const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(Obj); for (const SymbolRef &Symbol : Obj->symbols()) { - uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName); - StringRef Name = unwrapOrError(Symbol.getName(), FileName); - if (Name.empty()) + if (Name.empty() && !(Obj->isXCOFF() && SymbolDescription)) continue; - uint8_t SymbolType = ELF::STT_NOTYPE; - if (Obj->isELF()) { - SymbolType = getElfSymbolType(Obj, Symbol); - if (SymbolType == ELF::STT_SECTION) - continue; - } + if (Obj->isELF() && getElfSymbolType(Obj, Symbol) == ELF::STT_SECTION) + continue; - // Don't ask a Mach-O STAB symbol for its section unless you know that + // Don't ask a Mach-O STAB symbol for its section unless you know that // STAB symbol's section field refers to a valid section index. Otherwise // the symbol may error trying to load a section that does not exist. if (MachO) { @@ -1163,10 +1615,11 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName); if (SecI != Obj->section_end()) - AllSymbols[*SecI].emplace_back(Address, Name, SymbolType); + AllSymbols[*SecI].push_back(createSymbolInfo(Obj, Symbol)); else - AbsoluteSymbols.emplace_back(Address, Name, SymbolType); + AbsoluteSymbols.push_back(createSymbolInfo(Obj, Symbol)); } + if (AllSymbols.empty() && Obj->isELF()) addDynamicElfSymbols(Obj, AllSymbols); @@ -1174,25 +1627,32 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, StringSaver Saver(A); addPltEntries(Obj, AllSymbols, Saver); - // Create a mapping from virtual address to section. + // Create a mapping from virtual address to section. An empty section can + // cause more than one section at the same address. Sort such sections to be + // before same-addressed non-empty sections so that symbol lookups prefer the + // non-empty section. std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses; for (SectionRef Sec : Obj->sections()) SectionAddresses.emplace_back(Sec.getAddress(), Sec); - array_pod_sort(SectionAddresses.begin(), SectionAddresses.end()); + llvm::stable_sort(SectionAddresses, [](const auto &LHS, const auto &RHS) { + if (LHS.first != RHS.first) + return LHS.first < RHS.first; + return LHS.second.getSize() < RHS.second.getSize(); + }); // Linked executables (.exe and .dll files) typically don't include a real // symbol table but they might contain an export table. if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) { for (const auto &ExportEntry : COFFObj->export_directories()) { StringRef Name; - if (std::error_code EC = ExportEntry.getSymbolName(Name)) - reportError(errorCodeToError(EC), Obj->getFileName()); + if (Error E = ExportEntry.getSymbolName(Name)) + reportError(std::move(E), Obj->getFileName()); if (Name.empty()) continue; uint32_t RVA; - if (std::error_code EC = ExportEntry.getExportRVA(RVA)) - reportError(errorCodeToError(EC), Obj->getFileName()); + if (Error E = ExportEntry.getExportRVA(RVA)) + reportError(std::move(E), Obj->getFileName()); uint64_t VA = COFFObj->getImageBase() + RVA; auto Sec = partition_point( @@ -1208,11 +1668,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } // Sort all the symbols, this allows us to use a simple binary search to find - // a symbol near an address. - StringSet<> FoundDisasmFuncsSet; + // Multiple symbols can have the same address. Use a stable sort to stabilize + // the output. + StringSet<> FoundDisasmSymbolSet; for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols) - array_pod_sort(SecSyms.second.begin(), SecSyms.second.end()); - array_pod_sort(AbsoluteSymbols.begin(), AbsoluteSymbols.end()); + stable_sort(SecSyms.second); + stable_sort(AbsoluteSymbols); + + std::unique_ptr<DWARFContext> DICtx; + LiveVariablePrinter LVP(*Ctx.getRegisterInfo(), *STI); + + if (DbgVariables != DVDisabled) { + DICtx = DWARFContext::create(*Obj); + for (const std::unique_ptr<DWARFUnit> &CU : DICtx->compile_units()) + LVP.addCompileUnit(CU->getUnitDIE(false)); + } + + LLVM_DEBUG(LVP.dump()); for (const SectionRef &Section : ToolSectionFilter(*Obj)) { if (FilterSections.empty() && !DisassembleAll && @@ -1229,8 +1701,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, std::vector<MappingSymbolPair> MappingSymbols; if (hasMappingSymbols(Obj)) { for (const auto &Symb : Symbols) { - uint64_t Address = std::get<0>(Symb); - StringRef Name = std::get<1>(Symb); + uint64_t Address = Symb.Addr; + StringRef Name = Symb.Name; if (Name.startswith("$d")) MappingSymbols.emplace_back(Address - SectionAddr, 'd'); if (Name.startswith("$x")) @@ -1264,11 +1736,11 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, StringRef SectionName = unwrapOrError(Section.getName(), Obj->getFileName()); // If the section has no symbol at the start, just insert a dummy one. - if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) { - Symbols.insert( - Symbols.begin(), - std::make_tuple(SectionAddr, SectionName, - Section.isText() ? ELF::STT_FUNC : ELF::STT_OBJECT)); + if (Symbols.empty() || Symbols[0].Addr != 0) { + Symbols.insert(Symbols.begin(), + createDummySymbolInfo(Obj, SectionAddr, SectionName, + Section.isText() ? ELF::STT_FUNC + : ELF::STT_OBJECT)); } SmallString<40> Comments; @@ -1289,26 +1761,26 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, std::vector<RelocationRef>::const_iterator RelEnd = Rels.end(); // Disassemble symbol by symbol. for (unsigned SI = 0, SE = Symbols.size(); SI != SE; ++SI) { - std::string SymbolName = std::get<1>(Symbols[SI]).str(); + std::string SymbolName = Symbols[SI].Name.str(); if (Demangle) SymbolName = demangle(SymbolName); - // Skip if --disassemble-functions is not empty and the symbol is not in + // Skip if --disassemble-symbols is not empty and the symbol is not in // the list. - if (!DisasmFuncsSet.empty() && !DisasmFuncsSet.count(SymbolName)) + if (!DisasmSymbolSet.empty() && !DisasmSymbolSet.count(SymbolName)) continue; - uint64_t Start = std::get<0>(Symbols[SI]); + uint64_t Start = Symbols[SI].Addr; if (Start < SectionAddr || StopAddress <= Start) continue; else - FoundDisasmFuncsSet.insert(SymbolName); + FoundDisasmSymbolSet.insert(SymbolName); // The end is the section end, the beginning of the next symbol, or // --stop-address. uint64_t End = std::min<uint64_t>(SectionAddr + SectSize, StopAddress); if (SI + 1 < SE) - End = std::min(End, std::get<0>(Symbols[SI + 1])); + End = std::min(End, Symbols[SI + 1].Addr); if (Start >= End || End <= StartAddress) continue; Start -= SectionAddr; @@ -1323,12 +1795,12 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) { - if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) { + if (Symbols[SI].Type == ELF::STT_AMDGPU_HSA_KERNEL) { // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes) Start += 256; } if (SI == SE - 1 || - std::get<2>(Symbols[SI + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) { + Symbols[SI + 1].Type == ELF::STT_AMDGPU_HSA_KERNEL) { // cut trailing zeroes at the end of kernel // cut up to 256 bytes const uint64_t EndAlign = 256; @@ -1343,8 +1815,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, if (!NoLeadingAddr) outs() << format(Is64Bits ? "%016" PRIx64 " " : "%08" PRIx64 " ", SectionAddr + Start + VMAAdjustment); - - outs() << SymbolName << ":\n"; + if (Obj->isXCOFF() && SymbolDescription) { + outs() << getXCOFFSymbolDescription(Symbols[SI], SymbolName) << ":\n"; + } else + outs() << '<' << SymbolName << ">:\n"; // Don't print raw contents of a virtual section. A virtual section // doesn't have any contents in the file. @@ -1353,10 +1827,37 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, continue; } - // Some targets (like WebAssembly) have a special prelude at the start - // of each symbol. - DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start), - SectionAddr + Start, CommentStream); + auto Status = DisAsm->onSymbolStart(Symbols[SI], Size, + Bytes.slice(Start, End - Start), + SectionAddr + Start, CommentStream); + // To have round trippable disassembly, we fall back to decoding the + // remaining bytes as instructions. + // + // If there is a failure, we disassemble the failed region as bytes before + // falling back. The target is expected to print nothing in this case. + // + // If there is Success or SoftFail i.e no 'real' failure, we go ahead by + // Size bytes before falling back. + // So if the entire symbol is 'eaten' by the target: + // Start += Size // Now Start = End and we will never decode as + // // instructions + // + // Right now, most targets return None i.e ignore to treat a symbol + // separately. But WebAssembly decodes preludes for some symbols. + // + if (Status.hasValue()) { + if (Status.getValue() == MCDisassembler::Fail) { + outs() << "// Error in decoding " << SymbolName + << " : Decoding failed region as bytes.\n"; + for (uint64_t I = 0; I < Size; ++I) { + outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true) + << "\n"; + } + } + } else { + Size = 0; + } + Start += Size; Index = Start; @@ -1367,7 +1868,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, // only disassembling text (applicable all architectures), we are in a // situation where we must print the data and not disassemble it. if (Obj->isELF() && !DisassembleAll && Section.isText()) { - uint8_t SymTy = std::get<2>(Symbols[SI]); + uint8_t SymTy = Symbols[SI].Type; if (SymTy == ELF::STT_OBJECT || SymTy == ELF::STT_COMMON) { dumpELFData(SectionAddr, Index, End, Bytes); Index = End; @@ -1375,123 +1876,155 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } bool CheckARMELFData = hasMappingSymbols(Obj) && - std::get<2>(Symbols[SI]) != ELF::STT_OBJECT && + Symbols[SI].Type != ELF::STT_OBJECT && !DisassembleAll; + bool DumpARMELFData = false; + formatted_raw_ostream FOS(outs()); while (Index < End) { // ARM and AArch64 ELF binaries can interleave data and text in the // same section. We rely on the markers introduced to understand what // we need to dump. If the data marker is within a function, it is // denoted as a word/short etc. - if (CheckARMELFData && - getMappingSymbolKind(MappingSymbols, Index) == 'd') { - Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, - MappingSymbols); - continue; - } - - // When -z or --disassemble-zeroes are given we always dissasemble - // them. Otherwise we might want to skip zero bytes we see. - if (!DisassembleZeroes) { - uint64_t MaxOffset = End - Index; - // For -reloc: print zero blocks patched by relocations, so that - // relocations can be shown in the dump. - if (RelCur != RelEnd) - MaxOffset = RelCur->getOffset() - Index; - - if (size_t N = - countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { - outs() << "\t\t..." << '\n'; - Index += N; - continue; + if (CheckARMELFData) { + char Kind = getMappingSymbolKind(MappingSymbols, Index); + DumpARMELFData = Kind == 'd'; + if (SecondarySTI) { + if (Kind == 'a') { + STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI; + DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm; + } else if (Kind == 't') { + STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI; + DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm; + } } } - if (SecondarySTI) { - if (getMappingSymbolKind(MappingSymbols, Index) == 'a') { - STI = PrimaryIsThumb ? SecondarySTI : PrimarySTI; - DisAsm = PrimaryIsThumb ? SecondaryDisAsm : PrimaryDisAsm; - } else if (getMappingSymbolKind(MappingSymbols, Index) == 't') { - STI = PrimaryIsThumb ? PrimarySTI : SecondarySTI; - DisAsm = PrimaryIsThumb ? PrimaryDisAsm : SecondaryDisAsm; + if (DumpARMELFData) { + Size = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes, + MappingSymbols, FOS); + } else { + // When -z or --disassemble-zeroes are given we always dissasemble + // them. Otherwise we might want to skip zero bytes we see. + if (!DisassembleZeroes) { + uint64_t MaxOffset = End - Index; + // For --reloc: print zero blocks patched by relocations, so that + // relocations can be shown in the dump. + if (RelCur != RelEnd) + MaxOffset = RelCur->getOffset() - Index; + + if (size_t N = + countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) { + FOS << "\t\t..." << '\n'; + Index += N; + continue; + } } - } - // Disassemble a real instruction or a data when disassemble all is - // provided - MCInst Inst; - bool Disassembled = DisAsm->getInstruction( - Inst, Size, Bytes.slice(Index), SectionAddr + Index, CommentStream); - if (Size == 0) - Size = 1; - - PIP.printInst(*IP, Disassembled ? &Inst : nullptr, - Bytes.slice(Index, Size), - {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, - outs(), "", *STI, &SP, Obj->getFileName(), &Rels); - outs() << CommentStream.str(); - Comments.clear(); - - // Try to resolve the target of a call, tail call, etc. to a specific - // symbol. - if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || - MIA->isConditionalBranch(Inst))) { - uint64_t Target; - if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) { - // In a relocatable object, the target's section must reside in - // the same section as the call instruction or it is accessed - // through a relocation. - // - // In a non-relocatable object, the target may be in any section. - // - // N.B. We don't walk the relocations in the relocatable case yet. - auto *TargetSectionSymbols = &Symbols; - if (!Obj->isRelocatableObject()) { - auto It = partition_point( - SectionAddresses, - [=](const std::pair<uint64_t, SectionRef> &O) { - return O.first <= Target; - }); - if (It != SectionAddresses.begin()) { - --It; - TargetSectionSymbols = &AllSymbols[It->second]; + // Disassemble a real instruction or a data when disassemble all is + // provided + MCInst Inst; + bool Disassembled = + DisAsm->getInstruction(Inst, Size, Bytes.slice(Index), + SectionAddr + Index, CommentStream); + if (Size == 0) + Size = 1; + + LVP.update({Index, Section.getIndex()}, + {Index + Size, Section.getIndex()}, Index + Size != End); + + PIP.printInst( + *IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size), + {SectionAddr + Index + VMAAdjustment, Section.getIndex()}, FOS, + "", *STI, &SP, Obj->getFileName(), &Rels, LVP); + FOS << CommentStream.str(); + Comments.clear(); + + // If disassembly has failed, avoid analysing invalid/incomplete + // instruction information. Otherwise, try to resolve the target + // address (jump target or memory operand address) and print it on the + // right of the instruction. + if (Disassembled && MIA) { + uint64_t Target; + bool PrintTarget = + MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target); + if (!PrintTarget) + if (Optional<uint64_t> MaybeTarget = + MIA->evaluateMemoryOperandAddress( + Inst, SectionAddr + Index, Size)) { + Target = *MaybeTarget; + PrintTarget = true; + FOS << " # " << Twine::utohexstr(Target); + } + if (PrintTarget) { + // In a relocatable object, the target's section must reside in + // the same section as the call instruction or it is accessed + // through a relocation. + // + // In a non-relocatable object, the target may be in any section. + // In that case, locate the section(s) containing the target + // address and find the symbol in one of those, if possible. + // + // N.B. We don't walk the relocations in the relocatable case yet. + std::vector<const SectionSymbolsTy *> TargetSectionSymbols; + if (!Obj->isRelocatableObject()) { + auto It = llvm::partition_point( + SectionAddresses, + [=](const std::pair<uint64_t, SectionRef> &O) { + return O.first <= Target; + }); + uint64_t TargetSecAddr = 0; + while (It != SectionAddresses.begin()) { + --It; + if (TargetSecAddr == 0) + TargetSecAddr = It->first; + if (It->first != TargetSecAddr) + break; + TargetSectionSymbols.push_back(&AllSymbols[It->second]); + } } else { - TargetSectionSymbols = &AbsoluteSymbols; + TargetSectionSymbols.push_back(&Symbols); + } + TargetSectionSymbols.push_back(&AbsoluteSymbols); + + // Find the last symbol in the first candidate section whose + // offset is less than or equal to the target. If there are no + // such symbols, try in the next section and so on, before finally + // using the nearest preceding absolute symbol (if any), if there + // are no other valid symbols. + const SymbolInfoTy *TargetSym = nullptr; + for (const SectionSymbolsTy *TargetSymbols : + TargetSectionSymbols) { + auto It = llvm::partition_point( + *TargetSymbols, + [=](const SymbolInfoTy &O) { return O.Addr <= Target; }); + if (It != TargetSymbols->begin()) { + TargetSym = &*(It - 1); + break; + } } - } - // Find the last symbol in the section whose offset is less than - // or equal to the target. If there isn't a section that contains - // the target, find the nearest preceding absolute symbol. - auto TargetSym = partition_point( - *TargetSectionSymbols, - [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) { - return std::get<0>(O) <= Target; - }); - if (TargetSym == TargetSectionSymbols->begin()) { - TargetSectionSymbols = &AbsoluteSymbols; - TargetSym = partition_point( - AbsoluteSymbols, - [=](const std::tuple<uint64_t, StringRef, uint8_t> &O) { - return std::get<0>(O) <= Target; - }); - } - if (TargetSym != TargetSectionSymbols->begin()) { - --TargetSym; - uint64_t TargetAddress = std::get<0>(*TargetSym); - StringRef TargetName = std::get<1>(*TargetSym); - outs() << " <" << TargetName; - uint64_t Disp = Target - TargetAddress; - if (Disp) - outs() << "+0x" << Twine::utohexstr(Disp); - outs() << '>'; + if (TargetSym != nullptr) { + uint64_t TargetAddress = TargetSym->Addr; + std::string TargetName = TargetSym->Name.str(); + if (Demangle) + TargetName = demangle(TargetName); + + FOS << " <" << TargetName; + uint64_t Disp = Target - TargetAddress; + if (Disp) + FOS << "+0x" << Twine::utohexstr(Disp); + FOS << '>'; + } } } } - outs() << "\n"; + + LVP.printAfterInst(FOS); + FOS << "\n"; // Hexagon does this in pretty printer if (Obj->getArch() != Triple::hexagon) { - // Print relocation for instruction. + // Print relocation for instruction and data. while (RelCur != RelEnd) { uint64_t Offset = RelCur->getOffset(); // If this relocation is hidden, skip it. @@ -1500,7 +2033,11 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, continue; } - // Stop when RelCur's offset is past the current instruction. + // Stop when RelCur's offset is past the disassembled + // instruction/data. Note that it's possible the disassembled data + // is not the complete data: we might see the relocation printed in + // the middle of the data, but this matches the binutils objdump + // output. if (Offset >= Index + Size) break; @@ -1513,8 +2050,9 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, Offset += AdjustVMA; } - printRelocation(Obj->getFileName(), *RelCur, SectionAddr + Offset, - Is64Bits); + printRelocation(FOS, Obj->getFileName(), *RelCur, + SectionAddr + Offset, Is64Bits); + LVP.printAfterOtherLine(FOS, true); ++RelCur; } } @@ -1523,11 +2061,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj, } } } - StringSet<> MissingDisasmFuncsSet = - set_difference(DisasmFuncsSet, FoundDisasmFuncsSet); - for (StringRef MissingDisasmFunc : MissingDisasmFuncsSet.keys()) - reportWarning("failed to disassemble missing function " + MissingDisasmFunc, - FileName); + StringSet<> MissingDisasmSymbolSet = + set_difference(DisasmSymbolSet, FoundDisasmSymbolSet); + for (StringRef Sym : MissingDisasmSymbolSet.keys()) + reportWarning("failed to disassemble missing symbol " + Sym, FileName); } static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { @@ -1597,6 +2134,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { reportError(Obj->getFileName(), "no instruction printer for target " + TripleName); IP->setPrintImmHex(PrintImmHex); + IP->setPrintBranchImmAsAddress(true); PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName)); SourcePrinter SP(Obj, TheTarget->getName()); @@ -1611,7 +2149,7 @@ static void disassembleObject(const ObjectFile *Obj, bool InlineRelocs) { SP, InlineRelocs); } -void printRelocations(const ObjectFile *Obj) { +void objdump::printRelocations(const ObjectFile *Obj) { StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; // Regular objdump doesn't print relocations in non-relocatable object @@ -1639,6 +2177,11 @@ void printRelocations(const ObjectFile *Obj) { for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) { StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName()); outs() << "RELOCATION RECORDS FOR [" << SecName << "]:\n"; + uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8); + uint32_t TypePadding = 24; + outs() << left_justify("OFFSET", OffsetPadding) << " " + << left_justify("TYPE", TypePadding) << " " + << "VALUE\n"; for (SectionRef Section : P.second) { for (const RelocationRef &Reloc : Section.relocations()) { @@ -1651,15 +2194,16 @@ void printRelocations(const ObjectFile *Obj) { if (Error E = getRelocationValueString(Reloc, ValueStr)) reportError(std::move(E), Obj->getFileName()); - outs() << format(Fmt.data(), Address) << " " << RelocName << " " - << ValueStr << "\n"; + outs() << format(Fmt.data(), Address) << " " + << left_justify(RelocName, TypePadding) << " " << ValueStr + << "\n"; } } outs() << "\n"; } } -void printDynamicRelocations(const ObjectFile *Obj) { +void objdump::printDynamicRelocations(const ObjectFile *Obj) { // For the moment, this option is for ELF only if (!Obj->isELF()) return; @@ -1711,7 +2255,7 @@ static size_t getMaxSectionNameWidth(const ObjectFile *Obj) { return MaxWidth; } -void printSectionHeaders(const ObjectFile *Obj) { +void objdump::printSectionHeaders(const ObjectFile *Obj) { size_t NameWidth = getMaxSectionNameWidth(Obj); size_t AddressWidth = 2 * Obj->getBytesInAddress(); bool HasLMAColumn = shouldDisplayLMA(Obj); @@ -1756,7 +2300,7 @@ void printSectionHeaders(const ObjectFile *Obj) { outs() << "\n"; } -void printSectionContents(const ObjectFile *Obj) { +void objdump::printSectionContents(const ObjectFile *Obj) { for (const SectionRef &Section : ToolSectionFilter(*Obj)) { StringRef Name = unwrapOrError(Section.getName(), Obj->getFileName()); uint64_t BaseAddr = Section.getAddress(); @@ -1800,137 +2344,169 @@ void printSectionContents(const ObjectFile *Obj) { } } -void printSymbolTable(const ObjectFile *O, StringRef ArchiveName, - StringRef ArchitectureName) { - outs() << "SYMBOL TABLE:\n"; - - if (const COFFObjectFile *Coff = dyn_cast<const COFFObjectFile>(O)) { - printCOFFSymbolTable(Coff); +void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName, + StringRef ArchitectureName, bool DumpDynamic) { + if (O->isCOFF() && !DumpDynamic) { + outs() << "SYMBOL TABLE:\n"; + printCOFFSymbolTable(cast<const COFFObjectFile>(O)); return; } const StringRef FileName = O->getFileName(); + + if (!DumpDynamic) { + outs() << "SYMBOL TABLE:\n"; + for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I) + printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic); + return; + } + + outs() << "DYNAMIC SYMBOL TABLE:\n"; + if (!O->isELF()) { + reportWarning( + "this operation is not currently supported for this file format", + FileName); + return; + } + + const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O); + for (auto I = ELF->getDynamicSymbolIterators().begin(); + I != ELF->getDynamicSymbolIterators().end(); ++I) + printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic); +} + +void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol, + StringRef FileName, StringRef ArchiveName, + StringRef ArchitectureName, bool DumpDynamic) { const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O); - for (auto I = O->symbol_begin(), E = O->symbol_end(); I != E; ++I) { - const SymbolRef &Symbol = *I; - uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName, - ArchitectureName); - if ((Address < StartAddress) || (Address > StopAddress)) - continue; - SymbolRef::Type Type = unwrapOrError(Symbol.getType(), FileName, - ArchiveName, ArchitectureName); - uint32_t Flags = Symbol.getFlags(); + uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName, + ArchitectureName); + if ((Address < StartAddress) || (Address > StopAddress)) + return; + SymbolRef::Type Type = + unwrapOrError(Symbol.getType(), FileName, ArchiveName, ArchitectureName); + uint32_t Flags = + unwrapOrError(Symbol.getFlags(), FileName, ArchiveName, ArchitectureName); + + // Don't ask a Mach-O STAB symbol for its section unless you know that + // STAB symbol's section field refers to a valid section index. Otherwise + // the symbol may error trying to load a section that does not exist. + bool IsSTAB = false; + if (MachO) { + DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); + uint8_t NType = + (MachO->is64Bit() ? MachO->getSymbol64TableEntry(SymDRI).n_type + : MachO->getSymbolTableEntry(SymDRI).n_type); + if (NType & MachO::N_STAB) + IsSTAB = true; + } + section_iterator Section = IsSTAB + ? O->section_end() + : unwrapOrError(Symbol.getSection(), FileName, + ArchiveName, ArchitectureName); + + StringRef Name; + if (Type == SymbolRef::ST_Debug && Section != O->section_end()) { + if (Expected<StringRef> NameOrErr = Section->getName()) + Name = *NameOrErr; + else + consumeError(NameOrErr.takeError()); - // Don't ask a Mach-O STAB symbol for its section unless you know that - // STAB symbol's section field refers to a valid section index. Otherwise - // the symbol may error trying to load a section that does not exist. - bool isSTAB = false; - if (MachO) { - DataRefImpl SymDRI = Symbol.getRawDataRefImpl(); - uint8_t NType = (MachO->is64Bit() ? - MachO->getSymbol64TableEntry(SymDRI).n_type: - MachO->getSymbolTableEntry(SymDRI).n_type); - if (NType & MachO::N_STAB) - isSTAB = true; - } - section_iterator Section = isSTAB ? O->section_end() : - unwrapOrError(Symbol.getSection(), FileName, - ArchiveName, ArchitectureName); + } else { + Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName, + ArchitectureName); + } - StringRef Name; - if (Type == SymbolRef::ST_Debug && Section != O->section_end()) { - if (Expected<StringRef> NameOrErr = Section->getName()) - Name = *NameOrErr; - else - consumeError(NameOrErr.takeError()); + bool Global = Flags & SymbolRef::SF_Global; + bool Weak = Flags & SymbolRef::SF_Weak; + bool Absolute = Flags & SymbolRef::SF_Absolute; + bool Common = Flags & SymbolRef::SF_Common; + bool Hidden = Flags & SymbolRef::SF_Hidden; - } else { - Name = unwrapOrError(Symbol.getName(), FileName, ArchiveName, - ArchitectureName); - } + char GlobLoc = ' '; + if ((Section != O->section_end() || Absolute) && !Weak) + GlobLoc = Global ? 'g' : 'l'; + char IFunc = ' '; + if (O->isELF()) { + if (ELFSymbolRef(Symbol).getELFType() == ELF::STT_GNU_IFUNC) + IFunc = 'i'; + if (ELFSymbolRef(Symbol).getBinding() == ELF::STB_GNU_UNIQUE) + GlobLoc = 'u'; + } - bool Global = Flags & SymbolRef::SF_Global; - bool Weak = Flags & SymbolRef::SF_Weak; - bool Absolute = Flags & SymbolRef::SF_Absolute; - bool Common = Flags & SymbolRef::SF_Common; - bool Hidden = Flags & SymbolRef::SF_Hidden; - - char GlobLoc = ' '; - if (Type != SymbolRef::ST_Unknown) - GlobLoc = Global ? 'g' : 'l'; - char Debug = (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File) - ? 'd' : ' '; - char FileFunc = ' '; - if (Type == SymbolRef::ST_File) - FileFunc = 'f'; - else if (Type == SymbolRef::ST_Function) - FileFunc = 'F'; - else if (Type == SymbolRef::ST_Data) - FileFunc = 'O'; - - const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : - "%08" PRIx64; - - outs() << format(Fmt, Address) << " " - << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' ' - << (Weak ? 'w' : ' ') // Weak? - << ' ' // Constructor. Not supported yet. - << ' ' // Warning. Not supported yet. - << ' ' // Indirect reference to another symbol. - << Debug // Debugging (d) or dynamic (D) symbol. - << FileFunc // Name of function (F), file (f) or object (O). - << ' '; - if (Absolute) { - outs() << "*ABS*"; - } else if (Common) { - outs() << "*COM*"; - } else if (Section == O->section_end()) { - outs() << "*UND*"; - } else { - if (const MachOObjectFile *MachO = - dyn_cast<const MachOObjectFile>(O)) { - DataRefImpl DR = Section->getRawDataRefImpl(); - StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); - outs() << SegmentName << ","; - } - StringRef SectionName = - unwrapOrError(Section->getName(), O->getFileName()); - outs() << SectionName; + char Debug = ' '; + if (DumpDynamic) + Debug = 'D'; + else if (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File) + Debug = 'd'; + + char FileFunc = ' '; + if (Type == SymbolRef::ST_File) + FileFunc = 'f'; + else if (Type == SymbolRef::ST_Function) + FileFunc = 'F'; + else if (Type == SymbolRef::ST_Data) + FileFunc = 'O'; + + const char *Fmt = O->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64; + + outs() << format(Fmt, Address) << " " + << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' ' + << (Weak ? 'w' : ' ') // Weak? + << ' ' // Constructor. Not supported yet. + << ' ' // Warning. Not supported yet. + << IFunc // Indirect reference to another symbol. + << Debug // Debugging (d) or dynamic (D) symbol. + << FileFunc // Name of function (F), file (f) or object (O). + << ' '; + if (Absolute) { + outs() << "*ABS*"; + } else if (Common) { + outs() << "*COM*"; + } else if (Section == O->section_end()) { + outs() << "*UND*"; + } else { + if (MachO) { + DataRefImpl DR = Section->getRawDataRefImpl(); + StringRef SegmentName = MachO->getSectionFinalSegmentName(DR); + outs() << SegmentName << ","; } + StringRef SectionName = unwrapOrError(Section->getName(), FileName); + outs() << SectionName; + } - if (Common || isa<ELFObjectFileBase>(O)) { - uint64_t Val = - Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize(); - outs() << format("\t%08" PRIx64, Val); - } + if (Common || O->isELF()) { + uint64_t Val = + Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize(); + outs() << '\t' << format(Fmt, Val); + } - if (isa<ELFObjectFileBase>(O)) { - uint8_t Other = ELFSymbolRef(Symbol).getOther(); - switch (Other) { - case ELF::STV_DEFAULT: - break; - case ELF::STV_INTERNAL: - outs() << " .internal"; - break; - case ELF::STV_HIDDEN: - outs() << " .hidden"; - break; - case ELF::STV_PROTECTED: - outs() << " .protected"; - break; - default: - outs() << format(" 0x%02x", Other); - break; - } - } else if (Hidden) { + if (O->isELF()) { + uint8_t Other = ELFSymbolRef(Symbol).getOther(); + switch (Other) { + case ELF::STV_DEFAULT: + break; + case ELF::STV_INTERNAL: + outs() << " .internal"; + break; + case ELF::STV_HIDDEN: outs() << " .hidden"; + break; + case ELF::STV_PROTECTED: + outs() << " .protected"; + break; + default: + outs() << format(" 0x%02x", Other); + break; } - - if (Demangle) - outs() << ' ' << demangle(Name) << '\n'; - else - outs() << ' ' << Name << '\n'; + } else if (Hidden) { + outs() << " .hidden"; } + + if (Demangle) + outs() << ' ' << demangle(std::string(Name)) << '\n'; + else + outs() << ' ' << Name << '\n'; } static void printUnwindInfo(const ObjectFile *O) { @@ -1949,7 +2525,7 @@ static void printUnwindInfo(const ObjectFile *O) { /// Dump the raw contents of the __clangast section so the output can be piped /// into llvm-bcanalyzer. -void printRawClangAST(const ObjectFile *Obj) { +static void printRawClangAST(const ObjectFile *Obj) { if (outs().is_displayed()) { WithColor::error(errs(), ToolName) << "The -raw-clang-ast option will dump the raw binary contents of " @@ -1960,7 +2536,7 @@ void printRawClangAST(const ObjectFile *Obj) { } StringRef ClangASTSectionName("__clangast"); - if (isa<COFFObjectFile>(Obj)) { + if (Obj->isCOFF()) { ClangASTSectionName = "clangast"; } @@ -1988,9 +2564,9 @@ void printRawClangAST(const ObjectFile *Obj) { static void printFaultMaps(const ObjectFile *Obj) { StringRef FaultMapSectionName; - if (isa<ELFObjectFileBase>(Obj)) { + if (Obj->isELF()) { FaultMapSectionName = ".llvm_faultmaps"; - } else if (isa<MachOObjectFile>(Obj)) { + } else if (Obj->isMachO()) { FaultMapSectionName = "__llvm_faultmaps"; } else { WithColor::error(errs(), ToolName) @@ -2156,7 +2732,7 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, outs() << A->getFileName() << "(" << O->getFileName() << ")"; else outs() << O->getFileName(); - outs() << ":\tfile format " << O->getFileFormatName() << "\n\n"; + outs() << ":\tfile format " << O->getFileFormatName().lower() << "\n\n"; } if (StartAddress.getNumOccurrences() || StopAddress.getNumOccurrences()) @@ -2174,6 +2750,9 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr, printSectionHeaders(O); if (SymbolTable) printSymbolTable(O, ArchiveName); + if (DynamicSymbolTable) + printSymbolTable(O, ArchiveName, /*ArchitectureName=*/"", + /*DumpDynamic=*/true); if (DwarfDumpType != DIDT_Null) { std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O); // Dump the complete DWARF structure. @@ -2275,7 +2854,6 @@ static void dumpInput(StringRef file) { else reportError(errorCodeToError(object_error::invalid_file_type), file); } -} // namespace llvm int main(int argc, char **argv) { using namespace llvm; @@ -2291,7 +2869,9 @@ int main(int argc, char **argv) { // Register the target printer for --version. cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); - cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n"); + cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n", nullptr, + /*EnvVar=*/nullptr, + /*LongOptionsUseDoubleDash=*/true); if (StartAddress >= StopAddress) reportCmdLineError("start address should be less than stop address"); @@ -2307,13 +2887,13 @@ int main(int argc, char **argv) { SectionHeaders = SymbolTable = true; if (DisassembleAll || PrintSource || PrintLines || - (!DisassembleFunctions.empty())) + !DisassembleSymbols.empty()) Disassemble = true; if (!ArchiveHeaders && !Disassemble && DwarfDumpType == DIDT_Null && !DynamicRelocations && !FileHeaders && !PrivateHeaders && !RawClangAST && !Relocations && !SectionHeaders && !SectionContents && !SymbolTable && - !UnwindInfo && !FaultMapSection && + !DynamicSymbolTable && !UnwindInfo && !FaultMapSection && !(MachOOpt && (Bind || DataInCode || DylibId || DylibsUsed || ExportsTrie || FirstPrivateHeader || IndirectSymbols || InfoPlist || LazyBind || @@ -2323,8 +2903,7 @@ int main(int argc, char **argv) { return 2; } - DisasmFuncsSet.insert(DisassembleFunctions.begin(), - DisassembleFunctions.end()); + DisasmSymbolSet.insert(DisassembleSymbols.begin(), DisassembleSymbols.end()); llvm::for_each(InputFilenames, dumpInput); |