From 145449b1e420787bb99721a429341fa6be3adfb6 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sun, 3 Jul 2022 16:10:23 +0200 Subject: Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877. --- llvm/lib/DebugInfo/PDB/Native/InputFile.cpp | 587 ++++++++++++++++++++++++++++ 1 file changed, 587 insertions(+) create mode 100644 llvm/lib/DebugInfo/PDB/Native/InputFile.cpp (limited to 'llvm/lib/DebugInfo/PDB/Native/InputFile.cpp') diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp new file mode 100644 index 000000000000..495b25077737 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp @@ -0,0 +1,587 @@ +//===- InputFile.cpp ------------------------------------------ *- C++ --*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/InputFile.h" + +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" +#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" +#include "llvm/DebugInfo/PDB/Native/FormatUtil.h" +#include "llvm/DebugInfo/PDB/Native/LinePrinter.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/TpiStream.h" +#include "llvm/DebugInfo/PDB/PDB.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::object; +using namespace llvm::pdb; + +InputFile::InputFile() = default; +InputFile::~InputFile() = default; + +Expected +llvm::pdb::getModuleDebugStream(PDBFile &File, StringRef &ModuleName, + uint32_t Index) { + Expected DbiOrErr = File.getPDBDbiStream(); + if (!DbiOrErr) + return DbiOrErr.takeError(); + DbiStream &Dbi = *DbiOrErr; + const auto &Modules = Dbi.modules(); + if (Index >= Modules.getModuleCount()) + return make_error(raw_error_code::index_out_of_bounds, + "Invalid module index"); + + auto Modi = Modules.getModuleDescriptor(Index); + + ModuleName = Modi.getModuleName(); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error(raw_error_code::no_stream, + "Module stream not present"); + + auto ModStreamData = File.createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (auto EC = ModS.reload()) + return make_error(raw_error_code::corrupt_file, + "Invalid module stream"); + + return std::move(ModS); +} + +Expected llvm::pdb::getModuleDebugStream(PDBFile &File, + uint32_t Index) { + Expected DbiOrErr = File.getPDBDbiStream(); + if (!DbiOrErr) + return DbiOrErr.takeError(); + DbiStream &Dbi = *DbiOrErr; + const auto &Modules = Dbi.modules(); + auto Modi = Modules.getModuleDescriptor(Index); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error(raw_error_code::no_stream, + "Module stream not present"); + + auto ModStreamData = File.createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (Error Err = ModS.reload()) + return make_error(raw_error_code::corrupt_file, + "Invalid module stream"); + + return std::move(ModS); +} + +static inline bool isCodeViewDebugSubsection(object::SectionRef Section, + StringRef Name, + BinaryStreamReader &Reader) { + if (Expected NameOrErr = Section.getName()) { + if (*NameOrErr != Name) + return false; + } else { + consumeError(NameOrErr.takeError()); + return false; + } + + Expected ContentsOrErr = Section.getContents(); + if (!ContentsOrErr) { + consumeError(ContentsOrErr.takeError()); + return false; + } + + Reader = BinaryStreamReader(*ContentsOrErr, support::little); + uint32_t Magic; + if (Reader.bytesRemaining() < sizeof(uint32_t)) + return false; + cantFail(Reader.readInteger(Magic)); + if (Magic != COFF::DEBUG_SECTION_MAGIC) + return false; + return true; +} + +static inline bool isDebugSSection(object::SectionRef Section, + DebugSubsectionArray &Subsections) { + BinaryStreamReader Reader; + if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader)) + return false; + + cantFail(Reader.readArray(Subsections, Reader.bytesRemaining())); + return true; +} + +static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) { + BinaryStreamReader Reader; + if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) && + !isCodeViewDebugSubsection(Section, ".debug$P", Reader)) + return false; + cantFail(Reader.readArray(Types, Reader.bytesRemaining())); + return true; +} + +static std::string formatChecksumKind(FileChecksumKind Kind) { + switch (Kind) { + RETURN_CASE(FileChecksumKind, None, "None"); + RETURN_CASE(FileChecksumKind, MD5, "MD5"); + RETURN_CASE(FileChecksumKind, SHA1, "SHA-1"); + RETURN_CASE(FileChecksumKind, SHA256, "SHA-256"); + } + return formatUnknownEnum(Kind); +} + +template +static void formatInternal(LinePrinter &Printer, bool Append, Args &&...args) { + if (Append) + Printer.format(std::forward(args)...); + else + Printer.formatLine(std::forward(args)...); +} + +SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) { + if (!File) + return; + + if (File->isPdb()) + initializeForPdb(GroupIndex); + else { + Name = ".debug$S"; + uint32_t I = 0; + for (const auto &S : File->obj().sections()) { + DebugSubsectionArray SS; + if (!isDebugSSection(S, SS)) + continue; + + if (!SC.hasChecksums() || !SC.hasStrings()) + SC.initialize(SS); + + if (I == GroupIndex) + Subsections = SS; + + if (SC.hasChecksums() && SC.hasStrings()) + break; + } + rebuildChecksumMap(); + } +} + +StringRef SymbolGroup::name() const { return Name; } + +void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) { + Subsections = SS; +} + +void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); } + +void SymbolGroup::initializeForPdb(uint32_t Modi) { + assert(File && File->isPdb()); + + // PDB always uses the same string table, but each module has its own + // checksums. So we only set the strings if they're not already set. + if (!SC.hasStrings()) { + auto StringTable = File->pdb().getStringTable(); + if (StringTable) + SC.setStrings(StringTable->getStringTable()); + else + consumeError(StringTable.takeError()); + } + + SC.resetChecksums(); + auto MDS = getModuleDebugStream(File->pdb(), Name, Modi); + if (!MDS) { + consumeError(MDS.takeError()); + return; + } + + DebugStream = std::make_shared(std::move(*MDS)); + Subsections = DebugStream->getSubsectionsArray(); + SC.initialize(Subsections); + rebuildChecksumMap(); +} + +void SymbolGroup::rebuildChecksumMap() { + if (!SC.hasChecksums()) + return; + + for (const auto &Entry : SC.checksums()) { + auto S = SC.strings().getString(Entry.FileNameOffset); + if (!S) + continue; + ChecksumsByFile[*S] = Entry; + } +} + +const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const { + assert(File && File->isPdb() && DebugStream); + return *DebugStream; +} + +Expected SymbolGroup::getNameFromStringTable(uint32_t Offset) const { + return SC.strings().getString(Offset); +} + +Expected SymbolGroup::getNameFromChecksums(uint32_t Offset) const { + StringRef Name; + if (!SC.hasChecksums()) { + return std::move(Name); + } + + auto Iter = SC.checksums().getArray().at(Offset); + if (Iter == SC.checksums().getArray().end()) { + return std::move(Name); + } + + uint32_t FO = Iter->FileNameOffset; + auto ExpectedFile = getNameFromStringTable(FO); + if (!ExpectedFile) { + return std::move(Name); + } + + return *ExpectedFile; +} + +void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File, + bool Append) const { + auto FC = ChecksumsByFile.find(File); + if (FC == ChecksumsByFile.end()) { + formatInternal(Printer, Append, "- (no checksum) {0}", File); + return; + } + + formatInternal(Printer, Append, "- ({0}: {1}) {2}", + formatChecksumKind(FC->getValue().Kind), + toHex(FC->getValue().Checksum), File); +} + +void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer, + uint32_t Offset, + bool Append) const { + if (!SC.hasChecksums()) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + return; + } + + auto Iter = SC.checksums().getArray().at(Offset); + if (Iter == SC.checksums().getArray().end()) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + return; + } + + uint32_t FO = Iter->FileNameOffset; + auto ExpectedFile = getNameFromStringTable(FO); + if (!ExpectedFile) { + formatInternal(Printer, Append, "(unknown file name offset {0})", Offset); + consumeError(ExpectedFile.takeError()); + return; + } + if (Iter->Kind == FileChecksumKind::None) { + formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile); + } else { + formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile, + formatChecksumKind(Iter->Kind), toHex(Iter->Checksum)); + } +} + +Expected InputFile::open(StringRef Path, bool AllowUnknownFile) { + InputFile IF; + if (!llvm::sys::fs::exists(Path)) + return make_error(formatv("File {0} not found", Path), + inconvertibleErrorCode()); + + file_magic Magic; + if (auto EC = identify_magic(Path, Magic)) + return make_error( + formatv("Unable to identify file type for file {0}", Path), EC); + + if (Magic == file_magic::coff_object) { + Expected> BinaryOrErr = createBinary(Path); + if (!BinaryOrErr) + return BinaryOrErr.takeError(); + + IF.CoffObject = std::move(*BinaryOrErr); + IF.PdbOrObj = llvm::cast(IF.CoffObject.getBinary()); + return std::move(IF); + } + + if (Magic == file_magic::pdb) { + std::unique_ptr Session; + if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session)) + return std::move(Err); + + IF.PdbSession.reset(static_cast(Session.release())); + IF.PdbOrObj = &IF.PdbSession->getPDBFile(); + + return std::move(IF); + } + + if (!AllowUnknownFile) + return make_error( + formatv("File {0} is not a supported file type", Path), + inconvertibleErrorCode()); + + auto Result = MemoryBuffer::getFile(Path, /*IsText=*/false, + /*RequiresNullTerminator=*/false); + if (!Result) + return make_error( + formatv("File {0} could not be opened", Path), Result.getError()); + + IF.UnknownFile = std::move(*Result); + IF.PdbOrObj = IF.UnknownFile.get(); + return std::move(IF); +} + +PDBFile &InputFile::pdb() { + assert(isPdb()); + return *PdbOrObj.get(); +} + +const PDBFile &InputFile::pdb() const { + assert(isPdb()); + return *PdbOrObj.get(); +} + +object::COFFObjectFile &InputFile::obj() { + assert(isObj()); + return *PdbOrObj.get(); +} + +const object::COFFObjectFile &InputFile::obj() const { + assert(isObj()); + return *PdbOrObj.get(); +} + +MemoryBuffer &InputFile::unknown() { + assert(isUnknown()); + return *PdbOrObj.get(); +} + +const MemoryBuffer &InputFile::unknown() const { + assert(isUnknown()); + return *PdbOrObj.get(); +} + +StringRef InputFile::getFilePath() const { + if (isPdb()) + return pdb().getFilePath(); + if (isObj()) + return obj().getFileName(); + assert(isUnknown()); + return unknown().getBufferIdentifier(); +} + +bool InputFile::hasTypes() const { + if (isPdb()) + return pdb().hasPDBTpiStream(); + + for (const auto &Section : obj().sections()) { + CVTypeArray Types; + if (isDebugTSection(Section, Types)) + return true; + } + return false; +} + +bool InputFile::hasIds() const { + if (isObj()) + return false; + return pdb().hasPDBIpiStream(); +} + +bool InputFile::isPdb() const { return PdbOrObj.is(); } + +bool InputFile::isObj() const { + return PdbOrObj.is(); +} + +bool InputFile::isUnknown() const { return PdbOrObj.is(); } + +codeview::LazyRandomTypeCollection & +InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) { + if (Types && Kind == kTypes) + return *Types; + if (Ids && Kind == kIds) + return *Ids; + + if (Kind == kIds) { + assert(isPdb() && pdb().hasPDBIpiStream()); + } + + // If the collection was already initialized, we should have just returned it + // in step 1. + if (isPdb()) { + TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types; + auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream() + : pdb().getPDBTpiStream()); + + auto &Array = Stream.typeArray(); + uint32_t Count = Stream.getNumTypeRecords(); + auto Offsets = Stream.getTypeIndexOffsets(); + Collection = + std::make_unique(Array, Count, Offsets); + return *Collection; + } + + assert(isObj()); + assert(Kind == kTypes); + assert(!Types); + + for (const auto &Section : obj().sections()) { + CVTypeArray Records; + if (!isDebugTSection(Section, Records)) + continue; + + Types = std::make_unique(Records, 100); + return *Types; + } + + Types = std::make_unique(100); + return *Types; +} + +codeview::LazyRandomTypeCollection &InputFile::types() { + return getOrCreateTypeCollection(kTypes); +} + +codeview::LazyRandomTypeCollection &InputFile::ids() { + // Object files have only one type stream that contains both types and ids. + // Similarly, some PDBs don't contain an IPI stream, and for those both types + // and IDs are in the same stream. + if (isObj() || !pdb().hasPDBIpiStream()) + return types(); + + return getOrCreateTypeCollection(kIds); +} + +iterator_range InputFile::symbol_groups() { + return make_range(symbol_groups_begin(), + symbol_groups_end()); +} + +SymbolGroupIterator InputFile::symbol_groups_begin() { + return SymbolGroupIterator(*this); +} + +SymbolGroupIterator InputFile::symbol_groups_end() { + return SymbolGroupIterator(); +} + +SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {} + +SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) { + if (File.isObj()) { + SectionIter = File.obj().section_begin(); + scanToNextDebugS(); + } +} + +bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const { + bool E = isEnd(); + bool RE = R.isEnd(); + if (E || RE) + return E == RE; + + if (Value.File != R.Value.File) + return false; + return Index == R.Index; +} + +const SymbolGroup &SymbolGroupIterator::operator*() const { + assert(!isEnd()); + return Value; +} +SymbolGroup &SymbolGroupIterator::operator*() { + assert(!isEnd()); + return Value; +} + +SymbolGroupIterator &SymbolGroupIterator::operator++() { + assert(Value.File && !isEnd()); + ++Index; + if (isEnd()) + return *this; + + if (Value.File->isPdb()) { + Value.updatePdbModi(Index); + return *this; + } + + scanToNextDebugS(); + return *this; +} + +void SymbolGroupIterator::scanToNextDebugS() { + assert(SectionIter); + auto End = Value.File->obj().section_end(); + auto &Iter = *SectionIter; + assert(!isEnd()); + + while (++Iter != End) { + DebugSubsectionArray SS; + SectionRef SR = *Iter; + if (!isDebugSSection(SR, SS)) + continue; + + Value.updateDebugS(SS); + return; + } +} + +bool SymbolGroupIterator::isEnd() const { + if (!Value.File) + return true; + if (Value.File->isPdb()) { + DbiStream &Dbi = cantFail(Value.File->pdb().getPDBDbiStream()); + uint32_t Count = Dbi.modules().getModuleCount(); + assert(Index <= Count); + return Index == Count; + } + + assert(SectionIter); + return *SectionIter == Value.File->obj().section_end(); +} + +static bool isMyCode(const SymbolGroup &Group) { + if (Group.getFile().isObj()) + return true; + + StringRef Name = Group.name(); + if (Name.startswith("Import:")) + return false; + if (Name.endswith_insensitive(".dll")) + return false; + if (Name.equals_insensitive("* linker *")) + return false; + if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools")) + return false; + if (Name.startswith_insensitive("f:\\dd\\vctools\\crt")) + return false; + return true; +} + +bool llvm::pdb::shouldDumpSymbolGroup(uint32_t Idx, const SymbolGroup &Group, + const FilterOptions &Filters) { + if (Filters.JustMyCode && !isMyCode(Group)) + return false; + + // If the arg was not specified on the command line, always dump all modules. + if (!Filters.DumpModi) + return true; + + // Otherwise, only dump if this is the same module specified. + return (Filters.DumpModi == Idx); +} -- cgit v1.2.3