diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-05-03 20:26:11 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-05-03 20:26:11 +0000 |
commit | 148779df305667b6942fee7e758fdf81a6498f38 (patch) | |
tree | 976d85fb9cb4bc8ed54348b045f742be90e10c57 /lib/DebugInfo | |
parent | a303c417bbdb53703c2c17398b08486bde78f1f6 (diff) |
Notes
Diffstat (limited to 'lib/DebugInfo')
19 files changed, 1097 insertions, 378 deletions
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt index 421f22ca5d8d7..410d5a3777d47 100644 --- a/lib/DebugInfo/CodeView/CMakeLists.txt +++ b/lib/DebugInfo/CodeView/CMakeLists.txt @@ -15,6 +15,7 @@ add_llvm_library(LLVMDebugInfoCodeView ModuleDebugLineFragment.cpp ModuleDebugUnknownFragment.cpp RecordSerialization.cpp + StringTable.cpp SymbolRecordMapping.cpp SymbolDumper.cpp SymbolSerializer.cpp diff --git a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp index c349e7ecce961..42f0afc3e2d74 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/Support/BinaryStreamReader.h" using namespace llvm; @@ -25,7 +26,7 @@ struct FileChecksumEntryHeader { }; Error llvm::VarStreamArrayExtractor<FileChecksumEntry>::extract( - BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item, void *Ctx) { + BinaryStreamRef Stream, uint32_t &Len, FileChecksumEntry &Item) { BinaryStreamReader Reader(Stream); const FileChecksumEntryHeader *Header; @@ -49,10 +50,12 @@ Error ModuleDebugFileChecksumFragmentRef::initialize( return Error::success(); } -ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums) {} +ModuleDebugFileChecksumFragment::ModuleDebugFileChecksumFragment( + StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::FileChecksums), + Strings(Strings) {} -void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, +void ModuleDebugFileChecksumFragment::addChecksum(StringRef FileName, FileChecksumKind Kind, ArrayRef<uint8_t> Bytes) { FileChecksumEntry Entry; @@ -61,13 +64,14 @@ void ModuleDebugFileChecksumFragment::addChecksum(uint32_t StringTableOffset, ::memcpy(Copy, Bytes.data(), Bytes.size()); Entry.Checksum = makeArrayRef(Copy, Bytes.size()); } - Entry.FileNameOffset = StringTableOffset; + + Entry.FileNameOffset = Strings.insert(FileName); Entry.Kind = Kind; Checksums.push_back(Entry); // This maps the offset of this string in the string table to the offset // of this checksum entry in the checksum buffer. - OffsetMap[StringTableOffset] = SerializedSize; + OffsetMap[Entry.FileNameOffset] = SerializedSize; assert(SerializedSize % 4 == 0); uint32_t Len = alignTo(sizeof(FileChecksumEntryHeader) + Bytes.size(), 4); @@ -94,9 +98,10 @@ Error ModuleDebugFileChecksumFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -uint32_t ModuleDebugFileChecksumFragment::mapChecksumOffset( - uint32_t StringTableOffset) const { - auto Iter = OffsetMap.find(StringTableOffset); +uint32_t +ModuleDebugFileChecksumFragment::mapChecksumOffset(StringRef FileName) const { + uint32_t Offset = Strings.getStringId(FileName); + auto Iter = OffsetMap.find(Offset); assert(Iter != OffsetMap.end()); return Iter->second; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp index 483f7cb5c5ad3..cb6a8478797f9 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.cpp @@ -10,20 +10,22 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugInlineeLinesFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; Error VarStreamArrayExtractor<InlineeSourceLine>::extract( BinaryStreamRef Stream, uint32_t &Len, InlineeSourceLine &Item, - ContextType *Fragment) { + bool HasExtraFiles) { BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Item.Header)) return EC; - if (Fragment->hasExtraFiles()) { + if (HasExtraFiles) { uint32_t ExtraFileCount; if (auto EC = Reader.readInteger(ExtraFileCount)) return EC; @@ -42,7 +44,8 @@ Error ModuleDebugInlineeLineFragmentRef::initialize(BinaryStreamReader Reader) { if (auto EC = Reader.readEnum(Signature)) return EC; - if (auto EC = Reader.readArray(Lines, Reader.bytesRemaining(), this)) + if (auto EC = + Reader.readArray(Lines, Reader.bytesRemaining(), hasExtraFiles())) return EC; assert(Reader.bytesRemaining() == 0); @@ -54,9 +57,9 @@ bool ModuleDebugInlineeLineFragmentRef::hasExtraFiles() const { } ModuleDebugInlineeLineFragment::ModuleDebugInlineeLineFragment( - bool HasExtraFiles) + ModuleDebugFileChecksumFragment &Checksums, bool HasExtraFiles) : ModuleDebugFragment(ModuleDebugFragmentKind::InlineeLines), - HasExtraFiles(HasExtraFiles) {} + Checksums(Checksums), HasExtraFiles(HasExtraFiles) {} uint32_t ModuleDebugInlineeLineFragment::calculateSerializedLength() { // 4 bytes for the signature @@ -99,18 +102,22 @@ Error ModuleDebugInlineeLineFragment::commit(BinaryStreamWriter &Writer) { return Error::success(); } -void ModuleDebugInlineeLineFragment::addExtraFile(uint32_t FileOffset) { +void ModuleDebugInlineeLineFragment::addExtraFile(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + auto &Entry = Entries.back(); - Entry.ExtraFiles.push_back(ulittle32_t(FileOffset)); + Entry.ExtraFiles.push_back(ulittle32_t(Offset)); ++ExtraFileCount; } void ModuleDebugInlineeLineFragment::addInlineSite(TypeIndex FuncId, - uint32_t FileOffset, + StringRef FileName, uint32_t SourceLine) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + Entries.emplace_back(); auto &Entry = Entries.back(); - Entry.Header.FileID = FileOffset; + Entry.Header.FileID = Offset; Entry.Header.SourceLineNum = SourceLine; Entry.Header.Inlinee = FuncId; } diff --git a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp index 103010ca28332..e0ee934709ba5 100644 --- a/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp +++ b/lib/DebugInfo/CodeView/ModuleDebugLineFragment.cpp @@ -10,7 +10,9 @@ #include "llvm/DebugInfo/CodeView/ModuleDebugLineFragment.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" +#include "llvm/DebugInfo/CodeView/ModuleDebugFileChecksumFragment.h" #include "llvm/DebugInfo/CodeView/ModuleDebugFragmentRecord.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" using namespace llvm; using namespace llvm::codeview; @@ -65,11 +67,15 @@ bool ModuleDebugLineFragmentRef::hasColumnInfo() const { return !!(Header->Flags & LF_HaveColumns); } -ModuleDebugLineFragment::ModuleDebugLineFragment() - : ModuleDebugFragment(ModuleDebugFragmentKind::Lines) {} +ModuleDebugLineFragment::ModuleDebugLineFragment( + ModuleDebugFileChecksumFragment &Checksums, StringTable &Strings) + : ModuleDebugFragment(ModuleDebugFragmentKind::Lines), + Checksums(Checksums) {} -void ModuleDebugLineFragment::createBlock(uint32_t ChecksumBufferOffset) { - Blocks.emplace_back(ChecksumBufferOffset); +void ModuleDebugLineFragment::createBlock(StringRef FileName) { + uint32_t Offset = Checksums.mapChecksumOffset(FileName); + + Blocks.emplace_back(Offset); } void ModuleDebugLineFragment::addLineInfo(uint32_t Offset, diff --git a/lib/DebugInfo/CodeView/StringTable.cpp b/lib/DebugInfo/CodeView/StringTable.cpp new file mode 100644 index 0000000000000..21f11204686b4 --- /dev/null +++ b/lib/DebugInfo/CodeView/StringTable.cpp @@ -0,0 +1,71 @@ +//===- StringTable.cpp - CodeView String Table Reader/Writer ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/StringTable.h" + +#include "llvm/Support/BinaryStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" + +using namespace llvm; +using namespace llvm::codeview; + +StringTableRef::StringTableRef() {} + +Error StringTableRef::initialize(BinaryStreamRef Contents) { + Stream = Contents; + return Error::success(); +} + +Expected<StringRef> StringTableRef::getString(uint32_t Offset) const { + BinaryStreamReader Reader(Stream); + Reader.setOffset(Offset); + StringRef Result; + if (auto EC = Reader.readCString(Result)) + return std::move(EC); + return Result; +} + +uint32_t StringTable::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +uint32_t StringTable::calculateSerializedSize() const { return StringSize; } + +Error StringTable::commit(BinaryStreamWriter &Writer) const { + assert(Writer.bytesRemaining() == StringSize); + uint32_t MaxOffset = 1; + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + Writer.setOffset(Offset); + if (auto EC = Writer.writeCString(S)) + return EC; + MaxOffset = std::max<uint32_t>(MaxOffset, Offset + S.size() + 1); + } + + Writer.setOffset(MaxOffset); + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +uint32_t StringTable::size() const { return Strings.size(); } + +uint32_t StringTable::getStringId(StringRef S) const { + auto P = Strings.find(S); + assert(P != Strings.end()); + return P->second; +} diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp index 134471e81cacd..5395e4349b28d 100644 --- a/lib/DebugInfo/CodeView/SymbolDumper.cpp +++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h" #include "llvm/DebugInfo/CodeView/CVTypeDumper.h" #include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/StringTable.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -369,14 +370,14 @@ Error CVSymbolDumperImpl::visitKnownRecord( DictScope S(W, "DefRangeSubfield"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRangeSubfield.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent); printLocalVariableAddrRange(DefRangeSubfield.Range, @@ -390,14 +391,14 @@ Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DictScope S(W, "DefRange"); if (ObjDelegate) { - StringRef StringTable = ObjDelegate->getStringTable(); - auto ProgramStringTableOffset = DefRange.Program; - if (ProgramStringTableOffset >= StringTable.size()) + StringTableRef Strings = ObjDelegate->getStringTable(); + auto ExpectedProgram = Strings.getString(DefRange.Program); + if (!ExpectedProgram) { + consumeError(ExpectedProgram.takeError()); return llvm::make_error<CodeViewError>( "String table offset outside of bounds of String Table!"); - StringRef Program = - StringTable.drop_front(ProgramStringTableOffset).split('\0').first; - W.printString("Program", Program); + } + W.printString("Program", *ExpectedProgram); } printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset()); printLocalVariableAddrGap(DefRange.Gaps); diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt index 495edb7b48db0..6ca6e64bd8e6f 100644 --- a/lib/DebugInfo/DWARF/CMakeLists.txt +++ b/lib/DebugInfo/DWARF/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMDebugInfoDWARF DWARFTypeUnit.cpp DWARFUnitIndex.cpp DWARFUnit.cpp + DWARFVerifier.cpp SyntaxHighlighting.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp index b4ecbf805d1e3..573d37d77fee2 100644 --- a/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" -#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h" -#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h" #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" @@ -29,6 +29,7 @@ #include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h" #include "llvm/DebugInfo/DWARF/DWARFSection.h" #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h" +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" #include "llvm/Object/Decompressor.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" @@ -42,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> +#include <map> +#include <set> #include <string> #include <utility> #include <vector> @@ -284,11 +287,30 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } -bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { - bool Success = true; - if (DumpType == DIDT_All || DumpType == DIDT_Info) { +DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { + parseCompileUnits(); + if (auto *CU = CUs.getUnitForOffset(Offset)) + return CU->getDIEForOffset(Offset); + return DWARFDie(); +} + +namespace { + +class Verifier { + raw_ostream &OS; + DWARFContext &DCtx; +public: + Verifier(raw_ostream &S, DWARFContext &D) : OS(S), DCtx(D) {} + + bool HandleDebugInfo() { + bool Success = true; + // A map that tracks all references (converted absolute references) so we + // can verify each reference points to a valid DIE and not an offset that + // lies between to valid DIEs. + std::map<uint64_t, std::set<uint32_t>> ReferenceToDIEOffsets; + OS << "Verifying .debug_info...\n"; - for (const auto &CU : compile_units()) { + for (const auto &CU : DCtx.compile_units()) { unsigned NumDies = CU->getNumDIEs(); for (unsigned I = 0; I < NumDies; ++I) { auto Die = CU->getDIEAtIndex(I); @@ -299,101 +321,231 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { const auto Attr = AttrValue.Attr; const auto Form = AttrValue.Value.getForm(); switch (Attr) { - case DW_AT_ranges: - // Make sure the offset in the DW_AT_ranges attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= getRangeSection().Data.size()) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + Success = false; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { Success = false; - OS << "error: DW_AT_ranges offset is beyond .debug_ranges " - "bounds:\n"; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; Die.dump(OS, 0); OS << "\n"; } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - case DW_AT_stmt_list: - // Make sure the offset in the DW_AT_stmt_list attribute is valid. - if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= getLineSection().Data.size()) { + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + Success = false; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + CU->getUnitDIE().dump(OS, 0); + OS << "\n"; + } + } else { Success = false; - OS << "error: DW_AT_stmt_list offset is beyond .debug_line " - "bounds: " - << format("0x%08" PRIx32, *SectionOffset) << "\n"; - CU->getUnitDIE().dump(OS, 0); + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); OS << "\n"; } - } else { - Success = false; - OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - - default: - break; + break; + + default: + break; } switch (Form) { - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: { - // Verify all CU relative references are valid CU offsets. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal) { - auto DieCU = Die.getDwarfUnit(); - auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); - auto CUOffset = AttrValue.Value.getRawUValue(); - if (CUOffset >= CUSize) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + Success = false; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if(*RefVal >= DCtx.getInfoSection().Data.size()) { + Success = false; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { Success = false; - OS << "error: " << FormEncodingString(Form) << " CU offset " - << format("0x%08" PRIx32, CUOffset) - << " is invalid (must be less than CU size of " - << format("0x%08" PRIx32, CUSize) << "):\n"; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; Die.dump(OS, 0); OS << "\n"; } + break; } - break; + default: + break; } - case DW_FORM_ref_addr: { - // Verify all absolute DIE references have valid offsets in the - // .debug_info section. - Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); - assert(RefVal); - if (RefVal && *RefVal >= getInfoSection().Data.size()) { - Success = false; - OS << "error: DW_FORM_ref_addr offset beyond .debug_info " - "bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - case DW_FORM_strp: { - auto SecOffset = AttrValue.Value.getAsSectionOffset(); - assert(SecOffset); // DW_FORM_strp is a section offset. - if (SecOffset && *SecOffset >= getStringSection().size()) { - Success = false; - OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; - Die.dump(OS, 0); - OS << "\n"; - } - break; - } - default: - break; + } + } + } + + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair: ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + Success = false; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset: Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } + return Success; + } + + bool HandleDebugLine() { + std::map<uint64_t, DWARFDie> StmtListToDie; + bool Success = true; + OS << "Verifying .debug_line...\n"; + for (const auto &CU : DCtx.compile_units()) { + uint32_t LineTableOffset = 0; + auto CUDie = CU->getUnitDIE(); + auto StmtFormValue = CUDie.find(DW_AT_stmt_list); + if (!StmtFormValue) { + // No line table for this compile unit. + continue; + } + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + if (auto StmtSectionOffset = toSectionOffset(StmtFormValue)) { + LineTableOffset = *StmtSectionOffset; + if (LineTableOffset >= DCtx.getLineSection().Data.size()) { + // Make sure we don't get a valid line table back if the offset + // is wrong. + assert(DCtx.getLineTableForUnit(CU.get()) == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } else { + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + Success = false; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, CUDie.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + CUDie.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; } + StmtListToDie[LineTableOffset] = CUDie; } } + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (!LineTable) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + CUDie.dump(OS, 0); + OS << '\n'; + continue; + } + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + Success = false; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } } + return Success; + } +}; + +} // anonymous namespace + +bool DWARFContext::verify(raw_ostream &OS, DIDumpType DumpType) { + bool Success = true; + DWARFVerifier verifier(OS, *this); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + if (!verifier.handleDebugInfo()) + Success = false; + } + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + if (!verifier.handleDebugLine()) + Success = false; } return Success; } diff --git a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 77f3c00cc03fd..f32e8fe763579 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -10,6 +10,7 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -26,11 +27,19 @@ using namespace llvm; using namespace dwarf; typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; +namespace { +struct ContentDescriptor { + dwarf::LineNumberEntryFormat Type; + dwarf::Form Form; +}; +typedef SmallVector<ContentDescriptor, 4> ContentDescriptors; +} // end anonmyous namespace DWARFDebugLine::Prologue::Prologue() { clear(); } void DWARFDebugLine::Prologue::clear() { TotalLength = Version = PrologueLength = 0; + AddressSize = SegSelectorSize = 0; MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0; OpcodeBase = 0; IsDWARF64 = false; @@ -43,6 +52,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { OS << "Line table prologue:\n" << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) << format(" version: %u\n", Version) + << format(Version >= 5 ? " address_size: %u\n" : "", AddressSize) + << format(Version >= 5 ? " seg_select_size: %u\n" : "", SegSelectorSize) << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(Version >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) @@ -74,6 +85,125 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { } } +// Parse v2-v4 directory and file tables. +static void +parseV2DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + while (*OffsetPtr < EndPrologueOffset) { + StringRef S = DebugLineData.getCStrRef(OffsetPtr); + if (S.empty()) + break; + IncludeDirectories.push_back(S); + } + + while (*OffsetPtr < EndPrologueOffset) { + StringRef Name = DebugLineData.getCStrRef(OffsetPtr); + if (Name.empty()) + break; + DWARFDebugLine::FileNameEntry FileEntry; + FileEntry.Name = Name; + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileNames.push_back(FileEntry); + } +} + +// Parse v5 directory/file entry content descriptions. +// Returns the descriptors, or an empty vector if we did not find a path or +// ran off the end of the prologue. +static ContentDescriptors +parseV5EntryFormat(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset) { + ContentDescriptors Descriptors; + int FormatCount = DebugLineData.getU8(OffsetPtr); + bool HasPath = false; + for (int I = 0; I != FormatCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return ContentDescriptors(); + ContentDescriptor Descriptor; + Descriptor.Type = + dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); + Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); + if (Descriptor.Type == dwarf::DW_LNCT_path) + HasPath = true; + Descriptors.push_back(Descriptor); + } + return HasPath ? Descriptors : ContentDescriptors(); +} + +static bool +parseV5DirFileTables(DataExtractor DebugLineData, uint32_t *OffsetPtr, + uint64_t EndPrologueOffset, + std::vector<StringRef> &IncludeDirectories, + std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { + // Get the directory entry description. + ContentDescriptors DirDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (DirDescriptors.empty()) + return false; + + // Get the directory entries, according to the format described above. + int DirEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != DirEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + for (auto Descriptor : DirDescriptors) { + DWARFFormValue Value(Descriptor.Form); + switch (Descriptor.Type) { + case DW_LNCT_path: + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + IncludeDirectories.push_back(Value.getAsCString().getValue()); + break; + default: + if (!Value.skipValue(DebugLineData, OffsetPtr, nullptr)) + return false; + } + } + } + + // Get the file entry description. + ContentDescriptors FileDescriptors = + parseV5EntryFormat(DebugLineData, OffsetPtr, EndPrologueOffset); + if (FileDescriptors.empty()) + return false; + + // Get the file entries, according to the format described above. + int FileEntryCount = DebugLineData.getU8(OffsetPtr); + for (int I = 0; I != FileEntryCount; ++I) { + if (*OffsetPtr >= EndPrologueOffset) + return false; + DWARFDebugLine::FileNameEntry FileEntry; + for (auto Descriptor : FileDescriptors) { + DWARFFormValue Value(Descriptor.Form); + if (!Value.extractValue(DebugLineData, OffsetPtr, nullptr)) + return false; + switch (Descriptor.Type) { + case DW_LNCT_path: + FileEntry.Name = Value.getAsCString().getValue(); + break; + case DW_LNCT_directory_index: + FileEntry.DirIdx = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_timestamp: + FileEntry.ModTime = Value.getAsUnsignedConstant().getValue(); + break; + case DW_LNCT_size: + FileEntry.Length = Value.getAsUnsignedConstant().getValue(); + break; + // FIXME: Add MD5 + default: + break; + } + } + FileNames.push_back(FileEntry); + } + return true; +} + bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, uint32_t *OffsetPtr) { const uint64_t PrologueOffset = *OffsetPtr; @@ -90,6 +220,11 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, if (Version < 2) return false; + if (Version >= 5) { + AddressSize = DebugLineData.getU8(OffsetPtr); + SegSelectorSize = DebugLineData.getU8(OffsetPtr); + } + PrologueLength = DebugLineData.getUnsigned(OffsetPtr, sizeofPrologueLength()); const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; MinInstLength = DebugLineData.getU8(OffsetPtr); @@ -106,27 +241,18 @@ bool DWARFDebugLine::Prologue::parse(DataExtractor DebugLineData, StandardOpcodeLengths.push_back(OpLen); } - while (*OffsetPtr < EndPrologueOffset) { - const char *S = DebugLineData.getCStr(OffsetPtr); - if (S && S[0]) - IncludeDirectories.push_back(S); - else - break; - } - - while (*OffsetPtr < EndPrologueOffset) { - const char *Name = DebugLineData.getCStr(OffsetPtr); - if (Name && Name[0]) { - FileNameEntry FileEntry; - FileEntry.Name = Name; - FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); - FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); - FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); - FileNames.push_back(FileEntry); - } else { - break; + if (Version >= 5) { + if (!parseV5DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames)) { + fprintf(stderr, + "warning: parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64 "\n", PrologueOffset, (uint64_t)*OffsetPtr); + return false; } - } + } else + parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, + IncludeDirectories, FileNames); if (*OffsetPtr != EndPrologueOffset) { fprintf(stderr, @@ -161,6 +287,12 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { EpilogueBegin = false; } +void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) { + OS << "Address Line Column File ISA Discriminator Flags\n" + << "------------------ ------ ------ ------ --- ------------- " + "-------------\n"; +} + void DWARFDebugLine::Row::dump(raw_ostream &OS) const { OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u %13u ", File, Isa, Discriminator) @@ -187,9 +319,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const { OS << '\n'; if (!Rows.empty()) { - OS << "Address Line Column File ISA Discriminator Flags\n" - << "------------------ ------ ------ ------ --- ------------- " - "-------------\n"; + Row::dumpTableHeader(OS); for (const Row &R : Rows) { R.dump(OS); } @@ -637,7 +767,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex)) return false; const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1]; - const char *FileName = Entry.Name; + StringRef FileName = Entry.Name; if (Kind != FileLineInfoKind::AbsoluteFilePath || sys::path::is_absolute(FileName)) { Result = FileName; @@ -646,7 +776,7 @@ bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, SmallString<16> FilePath; uint64_t IncludeDirIndex = Entry.DirIdx; - const char *IncludeDir = ""; + StringRef IncludeDir; // Be defensive about the contents of Entry. if (IncludeDirIndex > 0 && IncludeDirIndex <= Prologue.IncludeDirectories.size()) diff --git a/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/lib/DebugInfo/DWARF/DWARFVerifier.cpp new file mode 100644 index 0000000000000..9494e876da155 --- /dev/null +++ b/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -0,0 +1,277 @@ +//===- DWARFVerifier.cpp --------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/DWARF/DWARFVerifier.h" +#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" +#include "llvm/DebugInfo/DWARF/DWARFDie.h" +#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" +#include "llvm/DebugInfo/DWARF/DWARFSection.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +#include <set> +#include <vector> + +using namespace llvm; +using namespace dwarf; +using namespace object; + +void DWARFVerifier::verifyDebugInfoAttribute(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Attr = AttrValue.Attr; + switch (Attr) { + case DW_AT_ranges: + // Make sure the offset in the DW_AT_ranges attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getRangeSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_ranges offset is beyond .debug_ranges " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_ranges encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + case DW_AT_stmt_list: + // Make sure the offset in the DW_AT_stmt_list attribute is valid. + if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { + if (*SectionOffset >= DCtx.getLineSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_AT_stmt_list offset is beyond .debug_line " + "bounds: " + << format("0x%08" PRIx32, *SectionOffset) << "\n"; + Die.dump(OS, 0); + OS << "\n"; + } + } else { + ++NumDebugInfoErrors; + OS << "error: DIE has invalid DW_AT_stmt_list encoding:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + + default: + break; + } +} + +void DWARFVerifier::verifyDebugInfoForm(DWARFDie &Die, + DWARFAttribute &AttrValue) { + const auto Form = AttrValue.Value.getForm(); + switch (Form) { + case DW_FORM_ref1: + case DW_FORM_ref2: + case DW_FORM_ref4: + case DW_FORM_ref8: + case DW_FORM_ref_udata: { + // Verify all CU relative references are valid CU offsets. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + auto DieCU = Die.getDwarfUnit(); + auto CUSize = DieCU->getNextUnitOffset() - DieCU->getOffset(); + auto CUOffset = AttrValue.Value.getRawUValue(); + if (CUOffset >= CUSize) { + ++NumDebugInfoErrors; + OS << "error: " << FormEncodingString(Form) << " CU offset " + << format("0x%08" PRIx32, CUOffset) + << " is invalid (must be less than CU size of " + << format("0x%08" PRIx32, CUSize) << "):\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_ref_addr: { + // Verify all absolute DIE references have valid offsets in the + // .debug_info section. + Optional<uint64_t> RefVal = AttrValue.Value.getAsReference(); + assert(RefVal); + if (RefVal) { + if (*RefVal >= DCtx.getInfoSection().Data.size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_ref_addr offset beyond .debug_info " + "bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } else { + // Valid reference, but we will verify it points to an actual + // DIE later. + ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset()); + } + } + break; + } + case DW_FORM_strp: { + auto SecOffset = AttrValue.Value.getAsSectionOffset(); + assert(SecOffset); // DW_FORM_strp is a section offset. + if (SecOffset && *SecOffset >= DCtx.getStringSection().size()) { + ++NumDebugInfoErrors; + OS << "error: DW_FORM_strp offset beyond .debug_str bounds:\n"; + Die.dump(OS, 0); + OS << "\n"; + } + break; + } + default: + break; + } +} + +void DWARFVerifier::veifyDebugInfoReferences() { + // Take all references and make sure they point to an actual DIE by + // getting the DIE by offset and emitting an error + OS << "Verifying .debug_info references...\n"; + for (auto Pair : ReferenceToDIEOffsets) { + auto Die = DCtx.getDIEForOffset(Pair.first); + if (Die) + continue; + ++NumDebugInfoErrors; + OS << "error: invalid DIE reference " << format("0x%08" PRIx64, Pair.first) + << ". Offset is in between DIEs:\n"; + for (auto Offset : Pair.second) { + auto ReferencingDie = DCtx.getDIEForOffset(Offset); + ReferencingDie.dump(OS, 0); + OS << "\n"; + } + OS << "\n"; + } +} + +bool DWARFVerifier::handleDebugInfo() { + NumDebugInfoErrors = 0; + OS << "Verifying .debug_info...\n"; + for (const auto &CU : DCtx.compile_units()) { + unsigned NumDies = CU->getNumDIEs(); + for (unsigned I = 0; I < NumDies; ++I) { + auto Die = CU->getDIEAtIndex(I); + const auto Tag = Die.getTag(); + if (Tag == DW_TAG_null) + continue; + for (auto AttrValue : Die.attributes()) { + verifyDebugInfoAttribute(Die, AttrValue); + verifyDebugInfoForm(Die, AttrValue); + } + } + } + veifyDebugInfoReferences(); + return NumDebugInfoErrors == 0; +} + +void DWARFVerifier::verifyDebugLineStmtOffsets() { + std::map<uint64_t, DWARFDie> StmtListToDie; + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + // Get the attribute value as a section offset. No need to produce an + // error here if the encoding isn't correct because we validate this in + // the .debug_info verifier. + auto StmtSectionOffset = toSectionOffset(Die.find(DW_AT_stmt_list)); + if (!StmtSectionOffset) + continue; + const uint32_t LineTableOffset = *StmtSectionOffset; + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + if (LineTableOffset < DCtx.getLineSection().Data.size()) { + if (!LineTable) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" << format("0x%08" PRIx32, LineTableOffset) + << "] was not able to be parsed for CU:\n"; + Die.dump(OS, 0); + OS << '\n'; + continue; + } + } else { + // Make sure we don't get a valid line table back if the offset is wrong. + assert(LineTable == nullptr); + // Skip this line table as it isn't valid. No need to create an error + // here because we validate this in the .debug_info verifier. + continue; + } + auto Iter = StmtListToDie.find(LineTableOffset); + if (Iter != StmtListToDie.end()) { + ++NumDebugLineErrors; + OS << "error: two compile unit DIEs, " + << format("0x%08" PRIx32, Iter->second.getOffset()) << " and " + << format("0x%08" PRIx32, Die.getOffset()) + << ", have the same DW_AT_stmt_list section offset:\n"; + Iter->second.dump(OS, 0); + Die.dump(OS, 0); + OS << '\n'; + // Already verified this line table before, no need to do it again. + continue; + } + StmtListToDie[LineTableOffset] = Die; + } +} + +void DWARFVerifier::verifyDebugLineRows() { + for (const auto &CU : DCtx.compile_units()) { + auto Die = CU->getUnitDIE(); + auto LineTable = DCtx.getLineTableForUnit(CU.get()); + // If there is no line table we will have created an error in the + // .debug_info verifier or in verifyDebugLineStmtOffsets(). + if (!LineTable) + continue; + uint32_t MaxFileIndex = LineTable->Prologue.FileNames.size(); + uint64_t PrevAddress = 0; + uint32_t RowIndex = 0; + for (const auto &Row : LineTable->Rows) { + if (Row.Address < PrevAddress) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "] row[" << RowIndex + << "] decreases in address from previous row:\n"; + + DWARFDebugLine::Row::dumpTableHeader(OS); + if (RowIndex > 0) + LineTable->Rows[RowIndex - 1].dump(OS); + Row.dump(OS); + OS << '\n'; + } + + if (Row.File > MaxFileIndex) { + ++NumDebugLineErrors; + OS << "error: .debug_line[" + << format("0x%08" PRIx32, + *toSectionOffset(Die.find(DW_AT_stmt_list))) + << "][" << RowIndex << "] has invalid file index " << Row.File + << " (valid values are [1," << MaxFileIndex << "]):\n"; + DWARFDebugLine::Row::dumpTableHeader(OS); + Row.dump(OS); + OS << '\n'; + } + if (Row.EndSequence) + PrevAddress = 0; + else + PrevAddress = Row.Address; + ++RowIndex; + } + } +} + +bool DWARFVerifier::handleDebugLine() { + NumDebugLineErrors = 0; + OS << "Verifying .debug_line...\n"; + verifyDebugLineStmtOffsets(); + verifyDebugLineRows(); + return NumDebugLineErrors == 0; +} diff --git a/lib/DebugInfo/PDB/CMakeLists.txt b/lib/DebugInfo/PDB/CMakeLists.txt index bd35efb51c74e..e1753018c7df3 100644 --- a/lib/DebugInfo/PDB/CMakeLists.txt +++ b/lib/DebugInfo/PDB/CMakeLists.txt @@ -48,11 +48,11 @@ add_pdb_impl_folder(Native Native/NativeSession.cpp Native/PDBFile.cpp Native/PDBFileBuilder.cpp + Native/PDBStringTable.cpp + Native/PDBStringTableBuilder.cpp Native/PDBTypeServerHandler.cpp Native/PublicsStream.cpp Native/RawError.cpp - Native/StringTable.cpp - Native/StringTableBuilder.cpp Native/SymbolStream.cpp Native/TpiHashing.cpp Native/TpiStream.cpp diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index f994b4538ef08..867864e47dce5 100644 --- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -89,6 +89,14 @@ uint32_t DbiModuleDescriptorBuilder::calculateSerializedLength() const { return alignTo(L + M + O, sizeof(uint32_t)); } +template <typename T> struct Foo { + explicit Foo(T &&Answer) : Answer(Answer) {} + + T Answer; +}; + +template <typename T> Foo<T> makeFoo(T &&t) { return Foo<T>(std::move(t)); } + void DbiModuleDescriptorBuilder::finalize() { Layout.FileNameOffs = 0; // TODO: Fix this Layout.Flags = 0; // TODO: Fix this diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp index 4802cc6e8197e..db703809f7c9c 100644 --- a/lib/DebugInfo/PDB/Native/DbiStream.cpp +++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp @@ -146,7 +146,7 @@ Error DbiStream::reload() { if (ECSubstream.getLength() > 0) { BinaryStreamReader ECReader(ECSubstream); - if (auto EC = ECNames.load(ECReader)) + if (auto EC = ECNames.reload(ECReader)) return EC; } diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp index 943e7fa13ab76..859295d2c7d33 100644 --- a/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -15,9 +15,9 @@ #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTable.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/Support/BinaryStream.h" @@ -337,8 +337,8 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { return *Symbols; } -Expected<StringTable &> PDBFile::getStringTable() { - if (!Strings || !StringTableStream) { +Expected<PDBStringTable &> PDBFile::getStringTable() { + if (!Strings) { auto IS = getPDBInfoStream(); if (!IS) return IS.takeError(); @@ -350,12 +350,13 @@ Expected<StringTable &> PDBFile::getStringTable() { if (!NS) return NS.takeError(); + auto N = llvm::make_unique<PDBStringTable>(); BinaryStreamReader Reader(**NS); - auto N = llvm::make_unique<StringTable>(); - if (auto EC = N->load(Reader)) + if (auto EC = N->reload(Reader)) return std::move(EC); - Strings = std::move(N); + assert(Reader.bytesRemaining() == 0); StringTableStream = std::move(*NS); + Strings = std::move(N); } return *Strings; } @@ -389,7 +390,7 @@ bool PDBFile::hasPDBSymbolStream() { bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } -bool PDBFile::hasStringTable() { +bool PDBFile::hasPDBStringTable() { auto IS = getPDBInfoStream(); if (!IS) return false; diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index b3c84903bc7e7..4dd965c69071e 100644 --- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -17,8 +17,8 @@ #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" #include "llvm/DebugInfo/PDB/Native/InfoStream.h" #include "llvm/DebugInfo/PDB/Native/InfoStreamBuilder.h" +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStreamBuilder.h" #include "llvm/Support/BinaryStream.h" @@ -67,7 +67,9 @@ TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() { return *Ipi; } -StringTableBuilder &PDBFileBuilder::getStringTableBuilder() { return Strings; } +PDBStringTableBuilder &PDBFileBuilder::getStringTableBuilder() { + return Strings; +} Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { auto ExpectedStream = Msf->addStream(Size); @@ -78,9 +80,9 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, uint32_t Size) { } Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { - uint32_t StringTableSize = Strings.finalize(); + uint32_t StringsLen = Strings.calculateSerializedSize(); - if (auto EC = addNamedStream("/names", StringTableSize)) + if (auto EC = addNamedStream("/names", StringsLen)) return std::move(EC); if (auto EC = addNamedStream("/LinkInfo", 0)) return std::move(EC); @@ -107,6 +109,13 @@ Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() { return Msf->build(); } +Expected<uint32_t> PDBFileBuilder::getNamedStreamIndex(StringRef Name) const { + uint32_t SN = 0; + if (!NamedStreams.get(Name, SN)) + return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + return SN; +} + Error PDBFileBuilder::commit(StringRef Filename) { auto ExpectedLayout = finalizeMsfLayout(); if (!ExpectedLayout) @@ -144,12 +153,12 @@ Error PDBFileBuilder::commit(StringRef Filename) { return EC; } - uint32_t StringTableStreamNo = 0; - if (!NamedStreams.get("/names", StringTableStreamNo)) - return llvm::make_error<pdb::RawError>(raw_error_code::no_stream); + auto ExpectedSN = getNamedStreamIndex("/names"); + if (!ExpectedSN) + return ExpectedSN.takeError(); auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer, - StringTableStreamNo); + *ExpectedSN); BinaryStreamWriter NSWriter(*NS); if (auto EC = Strings.commit(NSWriter)) return EC; diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp new file mode 100644 index 0000000000000..e84573fe07b8e --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp @@ -0,0 +1,134 @@ +//===- PDBStringTable.cpp - PDB String Table ---------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/RawError.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::pdb; + +uint32_t PDBStringTable::getByteSize() const { return ByteSize; } +uint32_t PDBStringTable::getNameCount() const { return NameCount; } +uint32_t PDBStringTable::getHashVersion() const { return Header->HashVersion; } +uint32_t PDBStringTable::getSignature() const { return Header->Signature; } + +Error PDBStringTable::readHeader(BinaryStreamReader &Reader) { + if (auto EC = Reader.readObject(Header)) + return EC; + + if (Header->Signature != PDBStringTableSignature) + return make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table signature"); + if (Header->HashVersion != 1 && Header->HashVersion != 2) + return make_error<RawError>(raw_error_code::corrupt_file, + "Unsupported hash version"); + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readStrings(BinaryStreamReader &Reader) { + BinaryStreamRef Stream; + if (auto EC = Reader.readStreamRef(Stream)) + return EC; + + if (auto EC = Strings.initialize(Stream)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Invalid hash table byte length")); + } + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) { + const support::ulittle32_t *HashCount; + if (auto EC = Reader.readObject(HashCount)) + return EC; + + if (auto EC = Reader.readArray(IDs, *HashCount)) { + return joinErrors(std::move(EC), + make_error<RawError>(raw_error_code::corrupt_file, + "Could not read bucket array")); + } + + return Error::success(); +} + +Error PDBStringTable::readEpilogue(BinaryStreamReader &Reader) { + if (auto EC = Reader.readInteger(NameCount)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTable::reload(BinaryStreamReader &Reader) { + + BinaryStreamReader SectionReader; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(PDBStringTableHeader)); + if (auto EC = readHeader(SectionReader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(Header->ByteSize); + if (auto EC = readStrings(SectionReader)) + return EC; + + // We don't know how long the hash table is until we parse it, so let the + // function responsible for doing that figure it out. + if (auto EC = readHashTable(Reader)) + return EC; + + std::tie(SectionReader, Reader) = Reader.split(sizeof(uint32_t)); + if (auto EC = readEpilogue(SectionReader)) + return EC; + + assert(Reader.bytesRemaining() == 0); + return Error::success(); +} + +Expected<StringRef> PDBStringTable::getStringForID(uint32_t ID) const { + return Strings.getString(ID); +} + +Expected<uint32_t> PDBStringTable::getIDForString(StringRef Str) const { + uint32_t Hash = + (Header->HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); + size_t Count = IDs.size(); + uint32_t Start = Hash % Count; + for (size_t I = 0; I < Count; ++I) { + // The hash is just a starting point for the search, but if it + // doesn't work we should find the string no matter what, because + // we iterate the entire array. + uint32_t Index = (Start + I) % Count; + + uint32_t ID = IDs[Index]; + auto ExpectedStr = getStringForID(ID); + if (!ExpectedStr) + return ExpectedStr.takeError(); + + if (*ExpectedStr == Str) + return ID; + } + return make_error<RawError>(raw_error_code::no_entry); +} + +FixedStreamArray<support::ulittle32_t> PDBStringTable::name_ids() const { + return IDs; +} diff --git a/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp new file mode 100644 index 0000000000000..a472181a4895c --- /dev/null +++ b/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -0,0 +1,133 @@ +//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/MappedBlockStream.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" +#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" +#include "llvm/DebugInfo/PDB/Native/RawTypes.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::msf; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t PDBStringTableBuilder::insert(StringRef S) { + return Strings.insert(S); +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t PDBStringTableBuilder::calculateHashTableSize() const { + uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. + Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); + + return Size; +} + +uint32_t PDBStringTableBuilder::calculateSerializedSize() const { + uint32_t Size = 0; + Size += sizeof(PDBStringTableHeader); + Size += Strings.calculateSerializedSize(); + Size += calculateHashTableSize(); + Size += sizeof(uint32_t); // The /names stream ends with the string count. + return Size; +} + +Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { + // Write a header + PDBStringTableHeader H; + H.Signature = PDBStringTableSignature; + H.HashVersion = 1; + H.ByteSize = Strings.calculateSerializedSize(); + if (auto EC = Writer.writeObject(H)) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { + if (auto EC = Strings.commit(Writer)) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector<ulittle32_t> Buckets(BucketCount); + + for (auto &Pair : Strings) { + StringRef S = Pair.getKey(); + uint32_t Offset = Pair.getValue(); + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) + return EC; + + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { + if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) + return EC; + assert(Writer.bytesRemaining() == 0); + return Error::success(); +} + +Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { + BinaryStreamWriter SectionWriter; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); + if (auto EC = writeHeader(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = + Writer.split(Strings.calculateSerializedSize()); + if (auto EC = writeStrings(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); + if (auto EC = writeHashTable(SectionWriter)) + return EC; + + std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); + if (auto EC = writeEpilogue(SectionWriter)) + return EC; + + return Error::success(); +} diff --git a/lib/DebugInfo/PDB/Native/StringTable.cpp b/lib/DebugInfo/PDB/Native/StringTable.cpp deleted file mode 100644 index 7e28389b83831..0000000000000 --- a/lib/DebugInfo/PDB/Native/StringTable.cpp +++ /dev/null @@ -1,109 +0,0 @@ -//===- StringTable.cpp - PDB String Table -----------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTable.h" - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawError.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamReader.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::pdb; - -StringTable::StringTable() {} - -Error StringTable::load(BinaryStreamReader &Stream) { - ByteSize = Stream.getLength(); - - const StringTableHeader *H; - if (auto EC = Stream.readObject(H)) - return EC; - - if (H->Signature != StringTableSignature) - return make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table signature"); - if (H->HashVersion != 1 && H->HashVersion != 2) - return make_error<RawError>(raw_error_code::corrupt_file, - "Unsupported hash version"); - - Signature = H->Signature; - HashVersion = H->HashVersion; - if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Invalid hash table byte length")); - - const support::ulittle32_t *HashCount; - if (auto EC = Stream.readObject(HashCount)) - return EC; - - if (auto EC = Stream.readArray(IDs, *HashCount)) - return joinErrors(std::move(EC), - make_error<RawError>(raw_error_code::corrupt_file, - "Could not read bucket array")); - - if (Stream.bytesRemaining() < sizeof(support::ulittle32_t)) - return make_error<RawError>(raw_error_code::corrupt_file, - "Missing name count"); - - if (auto EC = Stream.readInteger(NameCount)) - return EC; - - if (Stream.bytesRemaining() > 0) - return make_error<RawError>(raw_error_code::stream_too_long, - "Unexpected bytes found in string table"); - - return Error::success(); -} - -uint32_t StringTable::getByteSize() const { - return ByteSize; -} - -StringRef StringTable::getStringForID(uint32_t ID) const { - if (ID == IDs[0]) - return StringRef(); - - // NamesBuffer is a buffer of null terminated strings back to back. ID is - // the starting offset of the string we're looking for. So just seek into - // the desired offset and a read a null terminated stream from that offset. - StringRef Result; - BinaryStreamReader NameReader(NamesBuffer); - NameReader.setOffset(ID); - if (auto EC = NameReader.readCString(Result)) - consumeError(std::move(EC)); - return Result; -} - -uint32_t StringTable::getIDForString(StringRef Str) const { - uint32_t Hash = (HashVersion == 1) ? hashStringV1(Str) : hashStringV2(Str); - size_t Count = IDs.size(); - uint32_t Start = Hash % Count; - for (size_t I = 0; I < Count; ++I) { - // The hash is just a starting point for the search, but if it - // doesn't work we should find the string no matter what, because - // we iterate the entire array. - uint32_t Index = (Start + I) % Count; - - uint32_t ID = IDs[Index]; - StringRef S = getStringForID(ID); - if (S == Str) - return ID; - } - // IDs[0] contains the ID of the "invalid" entry. - return IDs[0]; -} - -FixedStreamArray<support::ulittle32_t> StringTable::name_ids() const { - return IDs; -} diff --git a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp b/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp deleted file mode 100644 index 40dc8e1bfcb12..0000000000000 --- a/lib/DebugInfo/PDB/Native/StringTableBuilder.cpp +++ /dev/null @@ -1,108 +0,0 @@ -//===- StringTableBuilder.cpp - PDB String Table ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo/PDB/Native/StringTableBuilder.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/DebugInfo/PDB/Native/Hash.h" -#include "llvm/DebugInfo/PDB/Native/RawTypes.h" -#include "llvm/Support/BinaryStreamWriter.h" -#include "llvm/Support/Endian.h" - -using namespace llvm; -using namespace llvm::support; -using namespace llvm::support::endian; -using namespace llvm::pdb; - -uint32_t StringTableBuilder::insert(StringRef S) { - auto P = Strings.insert({S, StringSize}); - - // If a given string didn't exist in the string table, we want to increment - // the string table size. - if (P.second) - StringSize += S.size() + 1; // +1 for '\0' - return P.first->second; -} - -uint32_t StringTableBuilder::getStringIndex(StringRef S) { - auto Iter = Strings.find(S); - assert(Iter != Strings.end()); - return Iter->second; -} - -static uint32_t computeBucketCount(uint32_t NumStrings) { - // The /names stream is basically an on-disk open-addressing hash table. - // Hash collisions are resolved by linear probing. We cannot make - // utilization 100% because it will make the linear probing extremely - // slow. But lower utilization wastes disk space. As a reasonable - // load factor, we choose 80%. We need +1 because slot 0 is reserved. - return (NumStrings + 1) * 1.25; -} - -uint32_t StringTableBuilder::finalize() { - uint32_t Size = 0; - Size += sizeof(StringTableHeader); - Size += StringSize; - Size += sizeof(uint32_t); // Hash table begins with 4-byte size field. - - uint32_t BucketCount = computeBucketCount(Strings.size()); - Size += BucketCount * sizeof(uint32_t); - - Size += - sizeof(uint32_t); // The /names stream ends with the number of strings. - return Size; -} - -Error StringTableBuilder::commit(BinaryStreamWriter &Writer) const { - // Write a header - StringTableHeader H; - H.Signature = StringTableSignature; - H.HashVersion = 1; - H.ByteSize = StringSize; - if (auto EC = Writer.writeObject(H)) - return EC; - - // Write a string table. - uint32_t StringStart = Writer.getOffset(); - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - Writer.setOffset(StringStart + Offset); - if (auto EC = Writer.writeCString(S)) - return EC; - } - Writer.setOffset(StringStart + StringSize); - - // Write a hash table. - uint32_t BucketCount = computeBucketCount(Strings.size()); - if (auto EC = Writer.writeInteger(BucketCount)) - return EC; - std::vector<ulittle32_t> Buckets(BucketCount); - - for (auto Pair : Strings) { - StringRef S = Pair.first; - uint32_t Offset = Pair.second; - uint32_t Hash = hashStringV1(S); - - for (uint32_t I = 0; I != BucketCount; ++I) { - uint32_t Slot = (Hash + I) % BucketCount; - if (Slot == 0) - continue; // Skip reserved slot - if (Buckets[Slot] != 0) - continue; - Buckets[Slot] = Offset; - break; - } - } - - if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) - return EC; - if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size()))) - return EC; - return Error::success(); -} |