diff options
Diffstat (limited to 'llvm/lib/DebugInfo/PDB/Native')
22 files changed, 1125 insertions, 226 deletions
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 419734771ccd5..73801ea1dd1b2 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -39,7 +39,7 @@ static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf) - : MSF(Msf), ModuleName(ModuleName) { + : MSF(Msf), ModuleName(std::string(ModuleName)) { ::memset(&Layout, 0, sizeof(Layout)); Layout.Mod = ModIndex; } @@ -51,7 +51,7 @@ uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const { } void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { - ObjFileName = Name; + ObjFileName = std::string(Name); } void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) { @@ -83,14 +83,13 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk( } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { - SourceFiles.push_back(Path); + SourceFiles.push_back(std::string(Path)); } uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const { uint32_t Result = 0; for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - Result += Builder->calculateSerializedLength(); + Result += Builder.calculateSerializedLength(); } return Result; } @@ -163,8 +162,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, "Invalid debug section alignment!"); // TODO: Write C11 Line data for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - if (auto EC = Builder->commit(SymbolWriter)) + if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb)) return EC; } @@ -180,12 +178,10 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, void DbiModuleDescriptorBuilder::addDebugSubsection( std::shared_ptr<DebugSubsection> Subsection) { assert(Subsection); - C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>( - std::move(Subsection), CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(std::move(Subsection))); } void DbiModuleDescriptorBuilder::addDebugSubsection( const DebugSubsectionRecord &SubsectionContents) { - C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>( - SubsectionContents, CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(SubsectionContents)); } diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 0e00c2f7ff98c..627aef7506fda 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -58,10 +58,6 @@ void DbiStreamBuilder::setMachineType(COFF::MachineTypes M) { MachineType = static_cast<pdb::PDB_Machine>(static_cast<unsigned>(M)); } -void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) { - SectionMap = SecMap; -} - void DbiStreamBuilder::setGlobalsStreamIndex(uint32_t Index) { GlobalsStreamIndex = Index; } @@ -348,19 +344,18 @@ static uint16_t toSecMapFlags(uint32_t Flags) { return Ret; } -// A utility function to create a Section Map for a given list of COFF sections. +// Populate the Section Map from COFF section headers. // // A Section Map seem to be a copy of a COFF section list in other format. // I don't know why a PDB file contains both a COFF section header and // a Section Map, but it seems it must be present in a PDB. -std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap( +void DbiStreamBuilder::createSectionMap( ArrayRef<llvm::object::coff_section> SecHdrs) { - std::vector<SecMapEntry> Ret; int Idx = 0; auto Add = [&]() -> SecMapEntry & { - Ret.emplace_back(); - auto &Entry = Ret.back(); + SectionMap.emplace_back(); + auto &Entry = SectionMap.back(); memset(&Entry, 0, sizeof(Entry)); Entry.Frame = Idx + 1; @@ -384,8 +379,6 @@ std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap( Entry.Flags = static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit) | static_cast<uint16_t>(OMFSegDescFlags::IsAbsoluteAddress); Entry.SecByteLength = UINT32_MAX; - - return Ret; } Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, @@ -417,7 +410,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, SecMapHeader SMHeader = {Size, Size}; if (auto EC = Writer.writeObject(SMHeader)) return EC; - if (auto EC = Writer.writeArray(SectionMap)) + if (auto EC = Writer.writeArray(makeArrayRef(SectionMap))) return EC; } diff --git a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp index f5125393695bc..37192ba36a04e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp @@ -34,4 +34,4 @@ ArrayRef<EnumEntry<uint16_t>> getOMFSegMapDescFlagNames() { return makeArrayRef(OMFSegMapDescFlagNames); } } -}
\ No newline at end of file +} diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 432f1e9b24d3a..4e58489f14014 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -5,10 +5,14 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// The data structures defined in this file are based on the reference +// implementation which is available at +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp +// +//===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" - -#include "llvm/ADT/DenseSet.h" #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -20,6 +24,7 @@ #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/xxhash.h" #include <algorithm> #include <vector> @@ -29,53 +34,91 @@ using namespace llvm::msf; using namespace llvm::pdb; using namespace llvm::codeview; +// Helper class for building the public and global PDB hash table buckets. struct llvm::pdb::GSIHashStreamBuilder { - struct SymbolDenseMapInfo { - static inline CVSymbol getEmptyKey() { - static CVSymbol Empty; - return Empty; - } - static inline CVSymbol getTombstoneKey() { - static CVSymbol Tombstone( - DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); - return Tombstone; - } - static unsigned getHashValue(const CVSymbol &Val) { - return xxHash64(Val.RecordData); - } - static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { - return LHS.RecordData == RHS.RecordData; - } - }; + // Sum of the size of all public or global records. + uint32_t RecordByteSize = 0; - std::vector<CVSymbol> Records; - uint32_t StreamIndex; - llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes; std::vector<PSHashRecord> HashRecords; + + // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The + // reference implementation builds a hash table with IPHR_HASH buckets in it. + // The last bucket is used to link together free hash table cells in a linked + // list, but it is always empty in the compressed, on-disk format. However, + // the bitmap must have a bit for it. std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; + std::vector<support::ulittle32_t> HashBuckets; uint32_t calculateSerializedLength() const; - uint32_t calculateRecordByteSize() const; Error commit(BinaryStreamWriter &Writer); - void finalizeBuckets(uint32_t RecordZeroOffset); - template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { - T Copy(Symbol); - addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), - CodeViewContainer::Pdb)); - } - void addSymbol(const CVSymbol &Symbol) { - if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { - auto Iter = SymbolHashes.insert(Symbol); - if (!Iter.second) - return; - } + void finalizePublicBuckets(); + void finalizeGlobalBuckets(uint32_t RecordZeroOffset); + + // Assign public and global symbol records into hash table buckets. + // Modifies the list of records to store the bucket index, but does not + // change the order. + void finalizeBuckets(uint32_t RecordZeroOffset, + MutableArrayRef<BulkPublic> Globals); +}; - Records.push_back(Symbol); +// DenseMapInfo implementation for deduplicating symbol records. +struct llvm::pdb::SymbolDenseMapInfo { + static inline CVSymbol getEmptyKey() { + static CVSymbol Empty; + return Empty; + } + static inline CVSymbol getTombstoneKey() { + static CVSymbol Tombstone( + DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); + return Tombstone; + } + static unsigned getHashValue(const CVSymbol &Val) { + return xxHash64(Val.RecordData); + } + static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { + return LHS.RecordData == RHS.RecordData; } }; +namespace { +LLVM_PACKED_START +struct PublicSym32Layout { + RecordPrefix Prefix; + PublicSym32Header Pub; + // char Name[]; +}; +LLVM_PACKED_END +} // namespace + +// Calculate how much memory this public needs when serialized. +static uint32_t sizeOfPublic(const BulkPublic &Pub) { + uint32_t NameLen = Pub.NameLen; + NameLen = std::min(NameLen, + uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); + return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); +} + +static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) { + // Assume the caller has allocated sizeOfPublic bytes. + uint32_t NameLen = std::min( + Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); + size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); + assert(Size == sizeOfPublic(Pub)); + auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem); + FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32); + FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2); + FixedMem->Pub.Flags = Pub.Flags; + FixedMem->Pub.Offset = Pub.Offset; + FixedMem->Pub.Segment = Pub.Segment; + char *NameMem = reinterpret_cast<char *>(FixedMem + 1); + memcpy(NameMem, Pub.Name, NameLen); + // Zero the null terminator and remaining bytes. + memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen); + return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size)); +} + uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { uint32_t Size = sizeof(GSIHashHeader); Size += HashRecords.size() * sizeof(PSHashRecord); @@ -84,13 +127,6 @@ uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { return Size; } -uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { - uint32_t Size = 0; - for (const auto &Sym : Records) - Size += Sym.length(); - return Size; -} - Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { GSIHashHeader Header; Header.VerSignature = GSIHashHeader::HdrSignature; @@ -115,70 +151,134 @@ static bool isAsciiString(StringRef S) { } // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp -static bool gsiRecordLess(StringRef S1, StringRef S2) { +static int gsiRecordCmp(StringRef S1, StringRef S2) { size_t LS = S1.size(); size_t RS = S2.size(); // Shorter strings always compare less than longer strings. if (LS != RS) - return LS < RS; + return LS - RS; // If either string contains non ascii characters, memcmp them. if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) - return memcmp(S1.data(), S2.data(), LS) < 0; + return memcmp(S1.data(), S2.data(), LS); // Both strings are ascii, perform a case-insenstive comparison. - return S1.compare_lower(S2.data()) < 0; + return S1.compare_lower(S2.data()); +} + +void GSIStreamBuilder::finalizePublicBuckets() { + PSH->finalizeBuckets(0, Publics); } -void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { - std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> - TmpBuckets; +void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) { + // Build up a list of globals to be bucketed. Use the BulkPublic data + // structure for this purpose, even though these are global records, not + // public records. Most of the same fields are required: + // - Name + // - NameLen + // - SymOffset + // - BucketIdx + // The dead fields are Offset, Segment, and Flags. + std::vector<BulkPublic> Records; + Records.resize(Globals.size()); uint32_t SymOffset = RecordZeroOffset; - for (const CVSymbol &Sym : Records) { - PSHashRecord HR; - // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. - HR.Off = SymOffset + 1; - HR.CRef = 1; // Always use a refcount of 1. - - // Hash the name to figure out which bucket this goes into. - StringRef Name = getSymbolName(Sym); - size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; - TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); - SymOffset += Sym.length(); + for (size_t I = 0, E = Globals.size(); I < E; ++I) { + StringRef Name = getSymbolName(Globals[I]); + Records[I].Name = Name.data(); + Records[I].NameLen = Name.size(); + Records[I].SymOffset = SymOffset; + SymOffset += Globals[I].length(); + } + + GSH->finalizeBuckets(RecordZeroOffset, Records); +} + +void GSIHashStreamBuilder::finalizeBuckets( + uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) { + // Hash every name in parallel. + parallelForEachN(0, Records.size(), [&](size_t I) { + Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH); + }); + + // Count up the size of each bucket. Then, use an exclusive prefix sum to + // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but + // we can't use it yet. + uint32_t BucketStarts[IPHR_HASH] = {0}; + for (const BulkPublic &P : Records) + ++BucketStarts[P.BucketIdx]; + uint32_t Sum = 0; + for (uint32_t &B : BucketStarts) { + uint32_t Size = B; + B = Sum; + Sum += Size; + } + + // Place globals into the hash table in bucket order. When placing a global, + // update the bucket start. Every hash table slot should be filled. Always use + // a refcount of one for now. + HashRecords.resize(Records.size()); + uint32_t BucketCursors[IPHR_HASH]; + memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors)); + for (int I = 0, E = Records.size(); I < E; ++I) { + uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++; + HashRecords[HashIdx].Off = I; + HashRecords[HashIdx].CRef = 1; } - // Compute the three tables: the hash records in bucket and chain order, the - // bucket presence bitmap, and the bucket chain start offsets. - HashRecords.reserve(Records.size()); - for (ulittle32_t &Word : HashBitmap) - Word = 0; - for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { - auto &Bucket = TmpBuckets[BucketIdx]; - if (Bucket.empty()) - continue; - HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); - - // Calculate what the offset of the first hash record in the chain would - // be if it were inflated to contain 32-bit pointers. On a 32-bit system, - // each record would be 12 bytes. See HROffsetCalc in gsi.h. - const int SizeOfHROffsetCalc = 12; - ulittle32_t ChainStartOff = - ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); - HashBuckets.push_back(ChainStartOff); - - // Sort each bucket by memcmp of the symbol's name. It's important that - // we use the same sorting algorithm as is used by the reference - // implementation to ensure that the search for a record within a bucket - // can properly early-out when it detects the record won't be found. The - // algorithm used here corredsponds to the function - // caseInsensitiveComparePchPchCchCch in the reference implementation. - llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left, - const std::pair<StringRef, PSHashRecord> &Right) { - return gsiRecordLess(Left.first, Right.first); - }); - - for (const auto &Entry : Bucket) - HashRecords.push_back(Entry.second); + // Within the buckets, sort each bucket by memcmp of the symbol's name. It's + // important that we use the same sorting algorithm as is used by the + // reference implementation to ensure that the search for a record within a + // bucket can properly early-out when it detects the record won't be found. + // The algorithm used here corresponds to the function + // caseInsensitiveComparePchPchCchCch in the reference implementation. + parallelForEachN(0, IPHR_HASH, [&](size_t I) { + auto B = HashRecords.begin() + BucketStarts[I]; + auto E = HashRecords.begin() + BucketCursors[I]; + if (B == E) + return; + auto BucketCmp = [Records](const PSHashRecord &LHash, + const PSHashRecord &RHash) { + const BulkPublic &L = Records[uint32_t(LHash.Off)]; + const BulkPublic &R = Records[uint32_t(RHash.Off)]; + assert(L.BucketIdx == R.BucketIdx); + int Cmp = gsiRecordCmp(L.getName(), R.getName()); + if (Cmp != 0) + return Cmp < 0; + // This comparison is necessary to make the sorting stable in the presence + // of two static globals with the same name. The easiest way to observe + // this is with S_LDATA32 records. + return L.SymOffset < R.SymOffset; + }; + llvm::sort(B, E, BucketCmp); + + // After we are done sorting, replace the global indices with the stream + // offsets of each global. Add one when writing symbol offsets to disk. + // See GSI1::fixSymRecs. + for (PSHashRecord &HRec : make_range(B, E)) + HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1; + }); + + // For each non-empty bucket, push the bucket start offset into HashBuckets + // and set a bit in the hash bitmap. + for (uint32_t I = 0; I < HashBitmap.size(); ++I) { + uint32_t Word = 0; + for (uint32_t J = 0; J < 32; ++J) { + // Skip empty buckets. + uint32_t BucketIdx = I * 32 + J; + if (BucketIdx >= IPHR_HASH || + BucketStarts[BucketIdx] == BucketCursors[BucketIdx]) + continue; + Word |= (1U << J); + + // Calculate what the offset of the first hash record in the chain would + // be if it were inflated to contain 32-bit pointers. On a 32-bit system, + // each record would be 12 bytes. See HROffsetCalc in gsi.h. + const int SizeOfHROffsetCalc = 12; + ulittle32_t ChainStartOff = + ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc); + HashBuckets.push_back(ChainStartOff); + } + HashBitmap[I] = Word; } } @@ -192,7 +292,7 @@ uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { uint32_t Size = 0; Size += sizeof(PublicsStreamHeader); Size += PSH->calculateSerializedLength(); - Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap + Size += Publics.size() * sizeof(uint32_t); // AddrMap // FIXME: Add thunk map and section offsets for incremental linking. return Size; @@ -204,103 +304,90 @@ uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { Error GSIStreamBuilder::finalizeMsfLayout() { // First we write public symbol records, then we write global symbol records. - uint32_t PSHZero = 0; - uint32_t GSHZero = PSH->calculateRecordByteSize(); - - PSH->finalizeBuckets(PSHZero); - GSH->finalizeBuckets(GSHZero); + finalizePublicBuckets(); + finalizeGlobalBuckets(PSH->RecordByteSize); Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); if (!Idx) return Idx.takeError(); - GSH->StreamIndex = *Idx; + GlobalsStreamIndex = *Idx; + Idx = Msf.addStream(calculatePublicsHashStreamSize()); if (!Idx) return Idx.takeError(); - PSH->StreamIndex = *Idx; + PublicsStreamIndex = *Idx; - uint32_t RecordBytes = - GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); + uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize; Idx = Msf.addStream(RecordBytes); if (!Idx) return Idx.takeError(); - RecordStreamIdx = *Idx; + RecordStreamIndex = *Idx; return Error::success(); } -static bool comparePubSymByAddrAndName( - const std::pair<const CVSymbol *, const PublicSym32 *> &LS, - const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { - if (LS.second->Segment != RS.second->Segment) - return LS.second->Segment < RS.second->Segment; - if (LS.second->Offset != RS.second->Offset) - return LS.second->Offset < RS.second->Offset; +void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) { + assert(Publics.empty() && PSH->RecordByteSize == 0 && + "publics can only be added once"); + Publics = std::move(PublicsIn); - return LS.second->Name < RS.second->Name; -} - -/// Compute the address map. The address map is an array of symbol offsets -/// sorted so that it can be binary searched by address. -static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { - // Make a vector of pointers to the symbols so we can sort it by address. - // Also gather the symbol offsets while we're at it. - - std::vector<PublicSym32> DeserializedPublics; - std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; - std::vector<uint32_t> SymOffsets; - DeserializedPublics.reserve(Records.size()); - PublicsByAddr.reserve(Records.size()); - SymOffsets.reserve(Records.size()); + // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism. + parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) { + return L.getName() < R.getName(); + }); + // Assign offsets and calculate the length of the public symbol records. uint32_t SymOffset = 0; - for (const CVSymbol &Sym : Records) { - assert(Sym.kind() == SymbolKind::S_PUB32); - DeserializedPublics.push_back( - cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); - PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); - SymOffsets.push_back(SymOffset); - SymOffset += Sym.length(); - } - llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName); - - // Fill in the symbol offsets in the appropriate order. - std::vector<ulittle32_t> AddrMap; - AddrMap.reserve(Records.size()); - for (auto &Sym : PublicsByAddr) { - ptrdiff_t Idx = std::distance(Records.data(), Sym.first); - assert(Idx >= 0 && size_t(Idx) < Records.size()); - AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); + for (BulkPublic &Pub : Publics) { + Pub.SymOffset = SymOffset; + SymOffset += sizeOfPublic(Pub); } - return AddrMap; -} -uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { - return PSH->StreamIndex; + // Remember the length of the public stream records. + PSH->RecordByteSize = SymOffset; } -uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { - return GSH->StreamIndex; +void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { - PSH->addSymbol(Pub, Msf); +void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { - GSH->addSymbol(Sym, Msf); +void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { - GSH->addSymbol(Sym, Msf); +template <typename T> +void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) { + T Copy(Symbol); + addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), + CodeViewContainer::Pdb)); } -void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { - GSH->addSymbol(Sym, Msf); +void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) { + // Ignore duplicate typedefs and constants. + if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { + auto Iter = GlobalsSeen.insert(Symbol); + if (!Iter.second) + return; + } + GSH->RecordByteSize += Symbol.length(); + Globals.push_back(Symbol); } -void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { - GSH->addSymbol(Sym); +// Serialize each public and write it. +static Error writePublics(BinaryStreamWriter &Writer, + ArrayRef<BulkPublic> Publics) { + std::vector<uint8_t> Storage; + for (const BulkPublic &Pub : Publics) { + Storage.resize(sizeOfPublic(Pub)); + serializePublic(Storage.data(), Pub); + if (Error E = Writer.writeBytes(Storage)) + return E; + } + return Error::success(); } static Error writeRecords(BinaryStreamWriter &Writer, @@ -318,14 +405,42 @@ Error GSIStreamBuilder::commitSymbolRecordStream( // Write public symbol records first, followed by global symbol records. This // must match the order that we assume in finalizeMsfLayout when computing // PSHZero and GSHZero. - if (auto EC = writeRecords(Writer, PSH->Records)) + if (auto EC = writePublics(Writer, Publics)) return EC; - if (auto EC = writeRecords(Writer, GSH->Records)) + if (auto EC = writeRecords(Writer, Globals)) return EC; return Error::success(); } +static std::vector<support::ulittle32_t> +computeAddrMap(ArrayRef<BulkPublic> Publics) { + // Build a parallel vector of indices into the Publics vector, and sort it by + // address. + std::vector<ulittle32_t> PubAddrMap; + PubAddrMap.reserve(Publics.size()); + for (int I = 0, E = Publics.size(); I < E; ++I) + PubAddrMap.push_back(ulittle32_t(I)); + + auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) { + const BulkPublic &L = Publics[LIdx]; + const BulkPublic &R = Publics[RIdx]; + if (L.Segment != R.Segment) + return L.Segment < R.Segment; + if (L.Offset != R.Offset) + return L.Offset < R.Offset; + // parallelSort is unstable, so we have to do name comparison to ensure + // that two names for the same location come out in a deterministic order. + return L.getName() < R.getName(); + }; + parallelSort(PubAddrMap, AddrCmp); + + // Rewrite the public symbol indices into symbol offsets. + for (ulittle32_t &Entry : PubAddrMap) + Entry = Publics[Entry].SymOffset; + return PubAddrMap; +} + Error GSIStreamBuilder::commitPublicsHashStream( WritableBinaryStreamRef Stream) { BinaryStreamWriter Writer(Stream); @@ -333,7 +448,7 @@ Error GSIStreamBuilder::commitPublicsHashStream( // FIXME: Fill these in. They are for incremental linking. Header.SymHash = PSH->calculateSerializedLength(); - Header.AddrMap = PSH->Records.size() * 4; + Header.AddrMap = Publics.size() * 4; Header.NumThunks = 0; Header.SizeOfThunk = 0; Header.ISectThunkTable = 0; @@ -346,8 +461,9 @@ Error GSIStreamBuilder::commitPublicsHashStream( if (auto EC = PSH->commit(Writer)) return EC; - std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); - if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) + std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics); + assert(PubAddrMap.size() == Publics.size()); + if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap))) return EC; return Error::success(); @@ -366,7 +482,7 @@ Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, auto PS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); auto PRS = WritableMappedBlockStream::createIndexedStream( - Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); + Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator()); if (auto EC = commitSymbolRecordStream(*PRS)) return EC; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 39ae84acba202..7717f062eac11 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -49,11 +49,11 @@ SymIndexId NativeCompilandSymbol::getLexicalParentId() const { return 0; } // this potential confusion. std::string NativeCompilandSymbol::getLibraryName() const { - return Module.getObjFileName(); + return std::string(Module.getObjFileName()); } std::string NativeCompilandSymbol::getName() const { - return Module.getModuleName(); + return std::string(Module.getModuleName()); } } // namespace pdb diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index 2f6a5bc3d5744..7a258acbd7c03 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -48,19 +48,19 @@ public: std::string getFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.FileNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } std::string getObjectFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.ObjNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } std::string getVirtualFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.VFileNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } uint32_t getCompression() const override { return Entry.Compression; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp new file mode 100644 index 0000000000000..1e4b076463351 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp @@ -0,0 +1,42 @@ +//==- NativeEnumLineNumbers.cpp - Native Type Enumerator impl ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativeEnumLineNumbers::NativeEnumLineNumbers( + std::vector<NativeLineNumber> LineNums) + : Lines(std::move(LineNums)), Index(0) {} + +uint32_t NativeEnumLineNumbers::getChildCount() const { + return static_cast<uint32_t>(Lines.size()); +} + +std::unique_ptr<IPDBLineNumber> +NativeEnumLineNumbers::getChildAtIndex(uint32_t N) const { + if (N >= getChildCount()) + return nullptr; + return std::make_unique<NativeLineNumber>(Lines[N]); +} + +std::unique_ptr<IPDBLineNumber> NativeEnumLineNumbers::getNext() { + return getChildAtIndex(Index++); +} + +void NativeEnumLineNumbers::reset() { Index = 0; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index 3f393409129b1..895f8943157a3 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -73,7 +73,7 @@ uint32_t NativeExeSymbol::getAge() const { } std::string NativeExeSymbol::getSymbolsFileName() const { - return Session.getPDBFile().getFilePath(); + return std::string(Session.getPDBFile().getFilePath()); } codeview::GUID NativeExeSymbol::getGuid() const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp new file mode 100644 index 0000000000000..2537daa7493c6 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp @@ -0,0 +1,57 @@ +//===- NativeFunctionSymbol.cpp - info about function symbols----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session, + SymIndexId Id, + const codeview::ProcSym &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativeFunctionSymbol::~NativeFunctionSymbol() {} + +void NativeFunctionSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "length", getLength(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativeFunctionSymbol::getAddressOffset() const { + return Sym.CodeOffset; +} + +uint32_t NativeFunctionSymbol::getAddressSection() const { return Sym.Segment; } +std::string NativeFunctionSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativeFunctionSymbol::getSymTag() const { + return PDB_SymType::Function; +} + +uint64_t NativeFunctionSymbol::getLength() const { return Sym.CodeSize; } + +uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} + +uint64_t NativeFunctionSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp new file mode 100644 index 0000000000000..2535e09baf625 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp @@ -0,0 +1,50 @@ +//===- NativeLineNumber.cpp - Native line number implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" + +using namespace llvm; +using namespace llvm::pdb; + +NativeLineNumber::NativeLineNumber(const NativeSession &Session, + const codeview::LineInfo Line, + uint32_t ColumnNumber, uint32_t Section, + uint32_t Offset, uint32_t Length, + uint32_t SrcFileId) + : Session(Session), Line(Line), ColumnNumber(ColumnNumber), + Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId) {} + +uint32_t NativeLineNumber::getLineNumber() const { return Line.getStartLine(); } + +uint32_t NativeLineNumber::getLineNumberEnd() const { + return Line.getEndLine(); +} + +uint32_t NativeLineNumber::getColumnNumber() const { return ColumnNumber; } + +uint32_t NativeLineNumber::getColumnNumberEnd() const { return 0; } + +uint32_t NativeLineNumber::getAddressSection() const { return Section; } + +uint32_t NativeLineNumber::getAddressOffset() const { return Offset; } + +uint32_t NativeLineNumber::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Section, Offset); +} + +uint64_t NativeLineNumber::getVirtualAddress() const { + return Session.getVAFromSectOffset(Section, Offset); +} + +uint32_t NativeLineNumber::getLength() const { return Length; } + +uint32_t NativeLineNumber::getSourceFileId() const { return SrcFileId; } + +uint32_t NativeLineNumber::getCompilandId() const { return 0; } + +bool NativeLineNumber::isStatement() const { return Line.isStatement(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp new file mode 100644 index 0000000000000..7086af7e67a2a --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp @@ -0,0 +1,52 @@ +//===- NativePublicSymbol.cpp - info about public symbols -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id, + const codeview::PublicSym32 &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativePublicSymbol::~NativePublicSymbol() {} + +void NativePublicSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativePublicSymbol::getAddressOffset() const { return Sym.Offset; } + +uint32_t NativePublicSymbol::getAddressSection() const { return Sym.Segment; } + +std::string NativePublicSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativePublicSymbol::getSymTag() const { + return PDB_SymType::PublicSymbol; +} + +uint32_t NativePublicSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.Offset); +} + +uint64_t NativePublicSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.Offset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index b45a5881dcb5c..ac8449df44ffb 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -12,6 +12,7 @@ #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" @@ -25,11 +26,14 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include <algorithm> #include <cassert> @@ -75,14 +79,125 @@ Error NativeSession::createFromPdb(std::unique_ptr<MemoryBuffer> Buffer, return Error::success(); } -Error NativeSession::createFromExe(StringRef Path, +static Expected<std::unique_ptr<PDBFile>> +loadPdbFile(StringRef PdbPath, std::unique_ptr<BumpPtrAllocator> &Allocator) { + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = + MemoryBuffer::getFile(PdbPath, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return make_error<RawError>(ErrorOrBuffer.getError()); + std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*ErrorOrBuffer); + + PdbPath = Buffer->getBufferIdentifier(); + file_magic Magic; + auto EC = identify_magic(PdbPath, Magic); + if (EC || Magic != file_magic::pdb) + return make_error<RawError>(EC); + + auto Stream = std::make_unique<MemoryBufferByteStream>(std::move(Buffer), + llvm::support::little); + + auto File = std::make_unique<PDBFile>(PdbPath, std::move(Stream), *Allocator); + if (auto EC = File->parseFileHeaders()) + return std::move(EC); + + if (auto EC = File->parseStreamData()) + return std::move(EC); + + return std::move(File); +} + +Error NativeSession::createFromPdbPath(StringRef PdbPath, + std::unique_ptr<IPDBSession> &Session) { + auto Allocator = std::make_unique<BumpPtrAllocator>(); + auto PdbFile = loadPdbFile(PdbPath, Allocator); + if (!PdbFile) + return PdbFile.takeError(); + + Session = std::make_unique<NativeSession>(std::move(PdbFile.get()), + std::move(Allocator)); + return Error::success(); +} + +static Expected<std::string> getPdbPathFromExe(StringRef ExePath) { + Expected<object::OwningBinary<object::Binary>> BinaryFile = + object::createBinary(ExePath); + if (!BinaryFile) + return BinaryFile.takeError(); + + const object::COFFObjectFile *ObjFile = + dyn_cast<object::COFFObjectFile>(BinaryFile->getBinary()); + if (!ObjFile) + return make_error<RawError>(raw_error_code::invalid_format); + + StringRef PdbPath; + const llvm::codeview::DebugInfo *PdbInfo = nullptr; + if (Error E = ObjFile->getDebugPDBInfo(PdbInfo, PdbPath)) + return std::move(E); + + return std::string(PdbPath); +} + +Error NativeSession::createFromExe(StringRef ExePath, std::unique_ptr<IPDBSession> &Session) { - return make_error<RawError>(raw_error_code::feature_unsupported); + Expected<std::string> PdbPath = getPdbPathFromExe(ExePath); + if (!PdbPath) + return PdbPath.takeError(); + + file_magic Magic; + auto EC = identify_magic(PdbPath.get(), Magic); + if (EC || Magic != file_magic::pdb) + return make_error<RawError>(EC); + + auto Allocator = std::make_unique<BumpPtrAllocator>(); + auto File = loadPdbFile(PdbPath.get(), Allocator); + if (!File) + return File.takeError(); + + Session = std::make_unique<NativeSession>(std::move(File.get()), + std::move(Allocator)); + + return Error::success(); } -uint64_t NativeSession::getLoadAddress() const { return 0; } +Expected<std::string> +NativeSession::searchForPdb(const PdbSearchOptions &Opts) { + Expected<std::string> PathOrErr = getPdbPathFromExe(Opts.ExePath); + if (!PathOrErr) + return PathOrErr.takeError(); + StringRef PathFromExe = PathOrErr.get(); + sys::path::Style Style = PathFromExe.startswith("/") + ? sys::path::Style::posix + : sys::path::Style::windows; + StringRef PdbName = sys::path::filename(PathFromExe, Style); + + // Check if pdb exists in the executable directory. + SmallString<128> PdbPath = StringRef(Opts.ExePath); + sys::path::remove_filename(PdbPath); + sys::path::append(PdbPath, PdbName); -bool NativeSession::setLoadAddress(uint64_t Address) { return false; } + auto Allocator = std::make_unique<BumpPtrAllocator>(); + + if (auto File = loadPdbFile(PdbPath, Allocator)) + return std::string(PdbPath); + else + consumeError(File.takeError()); + + // Check path that was in the executable. + if (auto File = loadPdbFile(PathFromExe, Allocator)) + return std::string(PathFromExe); + else + return File.takeError(); + + return make_error<RawError>("PDB not found"); +} + +uint64_t NativeSession::getLoadAddress() const { return LoadAddress; } + +bool NativeSession::setLoadAddress(uint64_t Address) { + LoadAddress = Address; + return true; +} std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() { return PDBSymbol::createAs<PDBSymbolExe>(*this, getNativeGlobalScope()); @@ -95,28 +210,52 @@ NativeSession::getSymbolById(SymIndexId SymbolId) const { bool NativeSession::addressForVA(uint64_t VA, uint32_t &Section, uint32_t &Offset) const { - return false; + uint32_t RVA = VA - getLoadAddress(); + return addressForRVA(RVA, Section, Offset); } -bool NativeSession::addressForRVA(uint32_t VA, uint32_t &Section, +bool NativeSession::addressForRVA(uint32_t RVA, uint32_t &Section, uint32_t &Offset) const { - return false; + Section = 0; + Offset = 0; + + auto Dbi = Pdb->getPDBDbiStream(); + if (!Dbi) + return false; + + if ((int32_t)RVA < 0) + return true; + + Offset = RVA; + for (; Section < Dbi->getSectionHeaders().size(); ++Section) { + auto &Sec = Dbi->getSectionHeaders()[Section]; + if (RVA < Sec.VirtualAddress) + return true; + Offset = RVA - Sec.VirtualAddress; + } + return true; } std::unique_ptr<PDBSymbol> -NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { - return nullptr; +NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForVA(Address, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } -std::unique_ptr<PDBSymbol> -NativeSession::findSymbolByRVA(uint32_t RVA, PDB_SymType Type) const { - return nullptr; +std::unique_ptr<PDBSymbol> NativeSession::findSymbolByRVA(uint32_t RVA, + PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForRVA(RVA, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } std::unique_ptr<PDBSymbol> NativeSession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const { - return nullptr; + PDB_SymType Type) { + return Cache.findSymbolBySectOffset(Sect, Offset, Type); } std::unique_ptr<IPDBEnumLineNumbers> @@ -128,18 +267,19 @@ NativeSession::findLineNumbers(const PDBSymbolCompiland &Compiland, std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { - return nullptr; + return Cache.findLineNumbersByVA(Address, Length); } std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const { - return nullptr; + return findLineNumbersByAddress(getLoadAddress() + RVA, Length); } std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset, uint32_t Length) const { - return nullptr; + uint64_t VA = getVAFromSectOffset(Section, Offset); + return findLineNumbersByAddress(VA, Length); } std::unique_ptr<IPDBEnumSourceFiles> @@ -179,7 +319,7 @@ std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getSourceFilesForCompiland( std::unique_ptr<IPDBSourceFile> NativeSession::getSourceFileById(uint32_t FileId) const { - return nullptr; + return Cache.getSourceFileById(FileId); } std::unique_ptr<IPDBEnumDataStreams> NativeSession::getDebugStreams() const { @@ -225,3 +365,24 @@ NativeExeSymbol &NativeSession::getNativeGlobalScope() const { return Cache.getNativeSymbolById<NativeExeSymbol>(ExeSymbol); } + +uint32_t NativeSession::getRVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + if (Section <= 0) + return 0; + + auto Dbi = getDbiStreamPtr(*Pdb); + if (!Dbi) + return 0; + + uint32_t MaxSection = Dbi->getSectionHeaders().size(); + if (Section > MaxSection + 1) + Section = MaxSection + 1; + auto &Sec = Dbi->getSectionHeaders()[Section - 1]; + return Sec.VirtualAddress + Offset; +} + +uint64_t NativeSession::getVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + return LoadAddress + getRVAFromSectOffset(Section, Offset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp new file mode 100644 index 0000000000000..6473207e058af --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp @@ -0,0 +1,47 @@ +//===- NativeSourceFile.cpp - Native line number implementaiton -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +using namespace llvm; +using namespace llvm::pdb; + +NativeSourceFile::NativeSourceFile(NativeSession &Session, uint32_t FileId, + const codeview::FileChecksumEntry &Checksum) + : Session(Session), FileId(FileId), Checksum(Checksum) {} + +std::string NativeSourceFile::getFileName() const { + auto ST = Session.getPDBFile().getStringTable(); + if (!ST) { + consumeError(ST.takeError()); + return ""; + } + auto FileName = ST->getStringTable().getString(Checksum.FileNameOffset); + if (!FileName) { + consumeError(FileName.takeError()); + return ""; + } + + return std::string(FileName.get()); +} + +uint32_t NativeSourceFile::getUniqueId() const { return FileId; } + +std::string NativeSourceFile::getChecksum() const { + return toStringRef(Checksum.Checksum).str(); +} + +PDB_Checksum NativeSourceFile::getChecksumType() const { + return static_cast<PDB_Checksum>(Checksum.Kind); +} + +std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>> +NativeSourceFile::getCompilands() const { + return nullptr; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp index 704c1254afbfd..e5f1dcaf801ee 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp @@ -51,7 +51,9 @@ SymIndexId NativeSymbolEnumerator::getClassParentId() const { SymIndexId NativeSymbolEnumerator::getLexicalParentId() const { return 0; } -std::string NativeSymbolEnumerator::getName() const { return Record.Name; } +std::string NativeSymbolEnumerator::getName() const { + return std::string(Record.Name); +} SymIndexId NativeSymbolEnumerator::getTypeId() const { return Parent.getTypeId(); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp index 80d455ad66e95..63ac9fae0e875 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp @@ -63,4 +63,4 @@ SymIndexId NativeTypeArray::getTypeId() const { Record.getElementType()); } -uint64_t NativeTypeArray::getLength() const { return Record.Size; }
\ No newline at end of file +uint64_t NativeTypeArray::getLength() const { return Record.Size; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp index 26ccb7daece08..aaec3a5e7c60e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp @@ -305,7 +305,7 @@ std::string NativeTypeEnum::getName() const { if (UnmodifiedType) return UnmodifiedType->getName(); - return Record->getName(); + return std::string(Record->getName()); } bool NativeTypeEnum::isNested() const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp index 60b3732822670..72964a9e0d4db 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp @@ -20,7 +20,9 @@ void NativeTypeTypedef::dump(raw_ostream &OS, int Indent, PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields); } -std::string NativeTypeTypedef::getName() const { return Record.Name; } +std::string NativeTypeTypedef::getName() const { + return std::string(Record.Name); +} SymIndexId NativeTypeTypedef::getTypeId() const { return Session.getSymbolCache().findSymbolByTypeIndex(Record.Type); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp index be67846c0b246..b0be7f76e86e4 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp @@ -74,7 +74,7 @@ std::string NativeTypeUDT::getName() const { if (UnmodifiedType) return UnmodifiedType->getName(); - return Tag->getName(); + return std::string(Tag->getName()); } SymIndexId NativeTypeUDT::getLexicalParentId() const { return 0; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index 9ac226b89139b..cde6452368512 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -41,7 +41,8 @@ typedef FixedStreamArray<support::ulittle32_t> ulittle_array; PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, BumpPtrAllocator &Allocator) - : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} + : FilePath(std::string(Path)), Allocator(Allocator), + Buffer(std::move(PdbFileBuffer)) {} PDBFile::~PDBFile() = default; diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index aa32887243900..deb0f201a71ed 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" - #include "llvm/ADT/BitVector.h" - #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" @@ -23,6 +21,7 @@ #include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" @@ -95,7 +94,7 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, StringRef Data) { if (!ExpectedIndex) return ExpectedIndex.takeError(); assert(NamedStreamData.count(*ExpectedIndex) == 0); - NamedStreamData[*ExpectedIndex] = Data; + NamedStreamData[*ExpectedIndex] = std::string(Data); return Error::success(); } @@ -144,7 +143,7 @@ Error PDBFileBuilder::finalizeMsfLayout() { if (Dbi) { Dbi->setPublicsStreamIndex(Gsi->getPublicsStreamIndex()); Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex()); - Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx()); + Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIndex()); } } if (Tpi) { diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp index 5cdd628312fe5..9f15907b519e8 100644 --- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp @@ -1,13 +1,18 @@ #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h" +#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h" @@ -19,6 +24,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -62,9 +68,10 @@ static const struct BuiltinTypeEntry { }; SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi) - : Session(Session), Dbi(Dbi) { + : Session(Session), Dbi(Dbi), AddrToModuleIndex(IMapAllocator) { // Id 0 is reserved for the invalid symbol. Cache.push_back(nullptr); + SourceFiles.push_back(nullptr); if (Dbi) Compilands.resize(Dbi->modules().getModuleCount()); @@ -281,6 +288,312 @@ SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) { return Id; } +Expected<ModuleDebugStreamRef> +SymbolCache::getModuleDebugStream(uint32_t Index) const { + assert(Dbi && "Dbi stream not present"); + + DbiModuleDescriptor Modi = Dbi->modules().getModuleDescriptor(Index); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error<RawError>("Module stream not present"); + + std::unique_ptr<msf::MappedBlockStream> ModStreamData = + Session.getPDBFile().createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (auto EC = ModS.reload()) + return std::move(EC); + + return std::move(ModS); +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, + PDB_SymType Type) { + if (AddrToModuleIndex.empty()) + parseSectionContribs(); + + switch (Type) { + case PDB_SymType::Function: + return findFunctionSymbolBySectOffset(Sect, Offset); + case PDB_SymType::PublicSymbol: + return findPublicSymbolBySectOffset(Sect, Offset); + case PDB_SymType::None: { + // FIXME: Implement for PDB_SymType::Data. + if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset)) + return Sym; + return nullptr; + } + default: + return nullptr; + } +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToFunctionSymId.find({Sect, Offset}); + if (Iter != AddressToFunctionSymId.end()) + return getSymbolById(Iter->second); + + if (!Dbi) + return nullptr; + + auto Modi = getModuleIndexForAddr(Session.getVAFromSectOffset(Sect, Offset)); + if (!Modi) + return nullptr; + + auto ExpectedModS = getModuleDebugStream(*Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return nullptr; + } + CVSymbolArray Syms = ExpectedModS->getSymbolArray(); + + // Search for the symbol in this module. + for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) { + if (I->kind() != S_LPROC32 && I->kind() != S_GPROC32) + continue; + auto PS = cantFail(SymbolDeserializer::deserializeAs<ProcSym>(*I)); + if (Sect == PS.Segment && Offset >= PS.CodeOffset && + Offset < PS.CodeOffset + PS.CodeSize) { + SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS); + AddressToFunctionSymId.insert({{Sect, Offset}, Id}); + return getSymbolById(Id); + } + + // Jump to the end of this ProcSym. + I = Syms.at(PS.End); + } + return nullptr; +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToPublicSymId.find({Sect, Offset}); + if (Iter != AddressToPublicSymId.end()) + return getSymbolById(Iter->second); + + auto Publics = Session.getPDBFile().getPDBPublicsStream(); + if (!Publics) + return nullptr; + + auto ExpectedSyms = Session.getPDBFile().getPDBSymbolStream(); + if (!ExpectedSyms) + return nullptr; + BinaryStreamRef SymStream = + ExpectedSyms->getSymbolArray().getUnderlyingStream(); + + // Use binary search to find the first public symbol with an address greater + // than or equal to Sect, Offset. + auto AddrMap = Publics->getAddressMap(); + auto First = AddrMap.begin(); + auto It = AddrMap.begin(); + size_t Count = AddrMap.size(); + size_t Half; + while (Count > 0) { + It = First; + Half = Count / 2; + It += Half; + Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + + auto PS = + cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get())); + if (PS.Segment < Sect || (PS.Segment == Sect && PS.Offset <= Offset)) { + First = ++It; + Count -= Half + 1; + } else + Count = Half; + } + if (It == AddrMap.begin()) + return nullptr; + --It; + + Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + auto PS = cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get())); + SymIndexId Id = createSymbol<NativePublicSymbol>(PS); + AddressToPublicSymId.insert({{Sect, Offset}, Id}); + return getSymbolById(Id); +} + +std::vector<SymbolCache::LineTableEntry> +SymbolCache::findLineTable(uint16_t Modi) const { + // Check if this module has already been added. + auto LineTableIter = LineTable.find(Modi); + if (LineTableIter != LineTable.end()) + return LineTableIter->second; + + std::vector<LineTableEntry> &ModuleLineTable = LineTable[Modi]; + + // If there is an error or there are no lines, just return the + // empty vector. + Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return ModuleLineTable; + } + + std::vector<std::vector<LineTableEntry>> EntryList; + for (const auto &SS : ExpectedModS->getSubsectionsArray()) { + if (SS.kind() != DebugSubsectionKind::Lines) + continue; + + DebugLinesSubsectionRef Lines; + BinaryStreamReader Reader(SS.getRecordData()); + if (auto EC = Lines.initialize(Reader)) { + consumeError(std::move(EC)); + continue; + } + + uint32_t RelocSegment = Lines.header()->RelocSegment; + uint32_t RelocOffset = Lines.header()->RelocOffset; + for (const LineColumnEntry &Group : Lines) { + if (Group.LineNumbers.empty()) + continue; + + std::vector<LineTableEntry> Entries; + + // If there are column numbers, then they should be in a parallel stream + // to the line numbers. + auto ColIt = Group.Columns.begin(); + auto ColsEnd = Group.Columns.end(); + + for (const LineNumberEntry &LN : Group.LineNumbers) { + uint64_t VA = + Session.getVAFromSectOffset(RelocSegment, RelocOffset + LN.Offset); + LineInfo Line(LN.Flags); + uint32_t ColNum = 0; + + if (Lines.hasColumnInfo() && ColIt != ColsEnd) { + ColNum = ColIt->StartColumn; + ++ColIt; + } + Entries.push_back({VA, Line, ColNum, Group.NameIndex, false}); + } + + // Add a terminal entry line to mark the end of this subsection. + uint64_t VA = Session.getVAFromSectOffset( + RelocSegment, RelocOffset + Lines.header()->CodeSize); + LineInfo LastLine(Group.LineNumbers.back().Flags); + uint32_t ColNum = + (Lines.hasColumnInfo()) ? Group.Columns.back().StartColumn : 0; + Entries.push_back({VA, LastLine, ColNum, Group.NameIndex, true}); + + EntryList.push_back(Entries); + } + } + + // Sort EntryList, and add flattened contents to the line table. + std::sort(EntryList.begin(), EntryList.end(), + [](const std::vector<LineTableEntry> &LHS, + const std::vector<LineTableEntry> &RHS) { + return LHS[0].Addr < RHS[0].Addr; + }); + for (size_t I = 0; I < EntryList.size(); ++I) + ModuleLineTable.insert(ModuleLineTable.end(), EntryList[I].begin(), + EntryList[I].end()); + + return ModuleLineTable; +} + +std::unique_ptr<IPDBEnumLineNumbers> +SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const { + Optional<uint16_t> MaybeModi = getModuleIndexForAddr(VA); + if (!MaybeModi) + return nullptr; + uint16_t Modi = *MaybeModi; + + std::vector<LineTableEntry> Lines = findLineTable(Modi); + if (Lines.empty()) + return nullptr; + + // Find the first line in the line table whose address is not greater than + // the one we are searching for. + auto LineIter = llvm::partition_point(Lines, [&](const LineTableEntry &E) { + return (E.Addr < VA || (E.Addr == VA && E.IsTerminalEntry)); + }); + + // Try to back up if we've gone too far. + if (LineIter == Lines.end() || LineIter->Addr > VA) { + if (LineIter == Lines.begin() || std::prev(LineIter)->IsTerminalEntry) + return nullptr; + --LineIter; + } + + Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return nullptr; + } + Expected<DebugChecksumsSubsectionRef> ExpectedChecksums = + ExpectedModS->findChecksumsSubsection(); + if (!ExpectedChecksums) { + consumeError(ExpectedChecksums.takeError()); + return nullptr; + } + + // Populate a vector of NativeLineNumbers that have addresses in the given + // address range. + Optional<uint16_t> EndModi = getModuleIndexForAddr(VA + Length); + if (!EndModi) + return nullptr; + std::vector<NativeLineNumber> LineNumbers; + while (Modi <= *EndModi) { + // If we reached the end of the current module, increment Modi and get the + // new line table and checksums array. + if (LineIter == Lines.end()) { + ++Modi; + + ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + break; + } + ExpectedChecksums = ExpectedModS->findChecksumsSubsection(); + if (!ExpectedChecksums) { + consumeError(ExpectedChecksums.takeError()); + break; + } + + Lines = findLineTable(Modi); + LineIter = Lines.begin(); + + if (Lines.empty()) + continue; + } + + if (LineIter->IsTerminalEntry) { + ++LineIter; + continue; + } + + // If the line is still within the address range, create a NativeLineNumber + // and add to the list. + if (LineIter->Addr > VA + Length) + break; + + uint32_t LineSect, LineOff; + Session.addressForVA(LineIter->Addr, LineSect, LineOff); + uint32_t LineLength = std::next(LineIter)->Addr - LineIter->Addr; + auto ChecksumIter = + ExpectedChecksums->getArray().at(LineIter->FileNameIndex); + uint32_t SrcFileId = getOrCreateSourceFile(*ChecksumIter); + NativeLineNumber LineNum(Session, LineIter->Line, LineIter->ColumnNumber, + LineSect, LineOff, LineLength, SrcFileId); + LineNumbers.push_back(LineNum); + ++LineIter; + } + return std::make_unique<NativeEnumLineNumbers>(std::move(LineNumbers)); +} + std::unique_ptr<PDBSymbolCompiland> SymbolCache::getOrCreateCompiland(uint32_t Index) { if (!Dbi) @@ -297,3 +610,65 @@ SymbolCache::getOrCreateCompiland(uint32_t Index) { return Session.getConcreteSymbolById<PDBSymbolCompiland>(Compilands[Index]); } + +std::unique_ptr<IPDBSourceFile> +SymbolCache::getSourceFileById(SymIndexId FileId) const { + assert(FileId < SourceFiles.size()); + + // Id 0 is reserved. + if (FileId == 0) + return nullptr; + + return std::unique_ptr<NativeSourceFile>( + new NativeSourceFile(*SourceFiles[FileId].get())); +} + +SymIndexId +SymbolCache::getOrCreateSourceFile(const FileChecksumEntry &Checksums) const { + auto Iter = FileNameOffsetToId.find(Checksums.FileNameOffset); + if (Iter != FileNameOffsetToId.end()) + return Iter->second; + + SymIndexId Id = SourceFiles.size(); + auto SrcFile = std::make_unique<NativeSourceFile>(Session, Id, Checksums); + SourceFiles.push_back(std::move(SrcFile)); + FileNameOffsetToId[Checksums.FileNameOffset] = Id; + return Id; +} + +void SymbolCache::parseSectionContribs() { + if (!Dbi) + return; + + class Visitor : public ISectionContribVisitor { + NativeSession &Session; + IMap &AddrMap; + + public: + Visitor(NativeSession &Session, IMap &AddrMap) + : Session(Session), AddrMap(AddrMap) {} + void visit(const SectionContrib &C) override { + if (C.Size == 0) + return; + + uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off); + uint64_t End = VA + C.Size; + + // Ignore overlapping sections based on the assumption that a valid + // PDB file should not have overlaps. + if (!AddrMap.overlaps(VA, End)) + AddrMap.insert(VA, End, C.Imod); + } + void visit(const SectionContrib2 &C) override { visit(C.Base); } + }; + + Visitor V(Session, AddrToModuleIndex); + Dbi->visitSectionContributions(V); +} + +Optional<uint16_t> SymbolCache::getModuleIndexForAddr(uint64_t Addr) const { + auto Iter = AddrToModuleIndex.find(Addr); + if (Iter == AddrToModuleIndex.end()) + return None; + return Iter.value(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 4f10f8524a9b1..51a1f0a544e3c 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record, Optional<uint32_t> Hash) { // If we just crossed an 8KB threshold, add a type index offset. + assert(((Record.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); size_t NewSize = TypeRecordBytes + Record.size(); constexpr size_t EightKB = 8 * 1024; if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { @@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; for (auto Rec : TypeRecords) { - assert(!Rec.empty()); // An empty record will not write anything, but it - // would shift all offsets from here on. + assert(!Rec.empty() && "Attempting to write an empty type record shifts " + "all offsets in the TPI stream!"); + assert(((Rec.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; } |