diff options
Diffstat (limited to 'llvm/lib/DebugInfo')
68 files changed, 4466 insertions, 1604 deletions
diff --git a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp index 86a6f9eebfa2..4d8b15530b9e 100644 --- a/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/AppendingTypeTableBuilder.cpp @@ -74,12 +74,17 @@ ArrayRef<ArrayRef<uint8_t>> AppendingTypeTableBuilder::records() const { void AppendingTypeTableBuilder::reset() { SeenRecords.clear(); } +static ArrayRef<uint8_t> stabilize(BumpPtrAllocator &RecordStorage, + ArrayRef<uint8_t> Record) { + uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + return ArrayRef<uint8_t>(Stable, Record.size()); +} + TypeIndex AppendingTypeTableBuilder::insertRecordBytes(ArrayRef<uint8_t> &Record) { TypeIndex NewTI = nextTypeIndex(); - uint8_t *Stable = RecordStorage.Allocate<uint8_t>(Record.size()); - memcpy(Stable, Record.data(), Record.size()); - Record = ArrayRef<uint8_t>(Stable, Record.size()); + Record = stabilize(RecordStorage, Record); SeenRecords.push_back(Record); return NewTI; } @@ -93,3 +98,15 @@ AppendingTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { TI = insertRecordBytes(C.RecordData); return TI; } + +bool AppendingTypeTableBuilder::replaceType(TypeIndex &Index, CVType Data, + bool Stabilize) { + assert(Index.toArrayIndex() < SeenRecords.size() && + "This function cannot be used to insert records!"); + + ArrayRef<uint8_t> Record = Data.data(); + if (Stabilize) + Record = stabilize(RecordStorage, Record); + SeenRecords[Index.toArrayIndex()] = Record; + return true; +} diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp index 36a384baa13d..49761b9dce88 100644 --- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp +++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp @@ -46,7 +46,7 @@ Error CodeViewRecordIO::endRecord() { while (PaddingBytes > 0) { char Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes); StringRef BytesSR = StringRef(&Pad, sizeof(Pad)); - Streamer->EmitBytes(BytesSR); + Streamer->emitBytes(BytesSR); --PaddingBytes; } resetStreamedLen(); @@ -101,7 +101,7 @@ Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes, const Twine &Comment) { if (isStreaming()) { emitComment(Comment); - Streamer->EmitBinaryData(toStringRef(Bytes)); + Streamer->emitBinaryData(toStringRef(Bytes)); incrStreamedLen(Bytes.size()); } else if (isWriting()) { if (auto EC = Writer->writeBytes(Bytes)) @@ -131,7 +131,7 @@ Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd, const Twine &Comment) { emitComment(Comment + ": " + TypeNameStr); else emitComment(Comment); - Streamer->EmitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex())); + Streamer->emitIntValue(TypeInd.getIndex(), sizeof(TypeInd.getIndex())); incrStreamedLen(sizeof(TypeInd.getIndex())); } else if (isWriting()) { if (auto EC = Writer->writeInteger(TypeInd.getIndex())) @@ -205,7 +205,7 @@ Error CodeViewRecordIO::mapStringZ(StringRef &Value, const Twine &Comment) { if (isStreaming()) { auto NullTerminatedString = StringRef(Value.data(), Value.size() + 1); emitComment(Comment); - Streamer->EmitBytes(NullTerminatedString); + Streamer->emitBytes(NullTerminatedString); incrStreamedLen(NullTerminatedString.size()); } else if (isWriting()) { // Truncate if we attempt to write too much. @@ -226,7 +226,7 @@ Error CodeViewRecordIO::mapGuid(GUID &Guid, const Twine &Comment) { StringRef GuidSR = StringRef((reinterpret_cast<const char *>(&Guid)), GuidSize); emitComment(Comment); - Streamer->EmitBytes(GuidSR); + Streamer->emitBytes(GuidSR); incrStreamedLen(GuidSize); return Error::success(); } @@ -275,24 +275,24 @@ void CodeViewRecordIO::emitEncodedSignedInteger(const int64_t &Value, const Twine &Comment) { assert(Value < 0 && "Encoded integer is not signed!"); if (Value >= std::numeric_limits<int8_t>::min()) { - Streamer->EmitIntValue(LF_CHAR, 2); + Streamer->emitIntValue(LF_CHAR, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 1); + Streamer->emitIntValue(Value, 1); incrStreamedLen(3); } else if (Value >= std::numeric_limits<int16_t>::min()) { - Streamer->EmitIntValue(LF_SHORT, 2); + Streamer->emitIntValue(LF_SHORT, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 2); + Streamer->emitIntValue(Value, 2); incrStreamedLen(4); } else if (Value >= std::numeric_limits<int32_t>::min()) { - Streamer->EmitIntValue(LF_LONG, 2); + Streamer->emitIntValue(LF_LONG, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 4); + Streamer->emitIntValue(Value, 4); incrStreamedLen(6); } else { - Streamer->EmitIntValue(LF_QUADWORD, 2); + Streamer->emitIntValue(LF_QUADWORD, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 4); + Streamer->emitIntValue(Value, 4); incrStreamedLen(6); } } @@ -301,22 +301,22 @@ void CodeViewRecordIO::emitEncodedUnsignedInteger(const uint64_t &Value, const Twine &Comment) { if (Value < LF_NUMERIC) { emitComment(Comment); - Streamer->EmitIntValue(Value, 2); + Streamer->emitIntValue(Value, 2); incrStreamedLen(2); } else if (Value <= std::numeric_limits<uint16_t>::max()) { - Streamer->EmitIntValue(LF_USHORT, 2); + Streamer->emitIntValue(LF_USHORT, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 2); + Streamer->emitIntValue(Value, 2); incrStreamedLen(4); } else if (Value <= std::numeric_limits<uint32_t>::max()) { - Streamer->EmitIntValue(LF_ULONG, 2); + Streamer->emitIntValue(LF_ULONG, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 4); + Streamer->emitIntValue(Value, 4); incrStreamedLen(6); } else { - Streamer->EmitIntValue(LF_UQUADWORD, 2); + Streamer->emitIntValue(LF_UQUADWORD, 2); emitComment(Comment); - Streamer->EmitIntValue(Value, 8); + Streamer->emitIntValue(Value, 8); incrStreamedLen(6); } } diff --git a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp index 0f704f286ee9..3c8a30101450 100644 --- a/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp +++ b/llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp @@ -23,13 +23,11 @@ using namespace llvm::codeview; DebugSubsectionRecord::DebugSubsectionRecord() = default; DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind, - BinaryStreamRef Data, - CodeViewContainer Container) - : Container(Container), Kind(Kind), Data(Data) {} + BinaryStreamRef Data) + : Kind(Kind), Data(Data) {} Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, - DebugSubsectionRecord &Info, - CodeViewContainer Container) { + DebugSubsectionRecord &Info) { const DebugSubsectionHeader *Header; BinaryStreamReader Reader(Stream); if (auto EC = Reader.readObject(Header)) @@ -39,7 +37,6 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream, static_cast<DebugSubsectionKind>(uint32_t(Header->Kind)); if (auto EC = Reader.readStreamRef(Info.Data, Header->Length)) return EC; - Info.Container = Container; Info.Kind = Kind; return Error::success(); } @@ -53,14 +50,14 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; } BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; } DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - std::shared_ptr<DebugSubsection> Subsection, CodeViewContainer Container) - : Subsection(std::move(Subsection)), Container(Container) {} + std::shared_ptr<DebugSubsection> Subsection) + : Subsection(std::move(Subsection)) {} DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder( - const DebugSubsectionRecord &Contents, CodeViewContainer Container) - : Contents(Contents), Container(Container) {} + const DebugSubsectionRecord &Contents) + : Contents(Contents) {} -uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { +uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() const { uint32_t DataSize = Subsection ? Subsection->calculateSerializedSize() : Contents.getRecordData().getLength(); // The length of the entire subsection is always padded to 4 bytes, @@ -68,7 +65,8 @@ uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() { return sizeof(DebugSubsectionHeader) + alignTo(DataSize, 4); } -Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) const { +Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer, + CodeViewContainer Container) const { assert(Writer.getOffset() % alignOf(Container) == 0 && "Debug Subsection not properly aligned"); diff --git a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp index a7ad1d045f04..7cd9ca7498f5 100644 --- a/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -84,6 +84,13 @@ void GlobalTypeTableBuilder::reset() { SeenRecords.clear(); } +static inline ArrayRef<uint8_t> stabilize(BumpPtrAllocator &Alloc, + ArrayRef<uint8_t> Data) { + uint8_t *Stable = Alloc.Allocate<uint8_t>(Data.size()); + memcpy(Stable, Data.data(), Data.size()); + return makeArrayRef(Stable, Data.size()); +} + TypeIndex GlobalTypeTableBuilder::insertRecordBytes(ArrayRef<uint8_t> Record) { GloballyHashedType GHT = GloballyHashedType::hashType(Record, SeenHashes, SeenHashes); @@ -104,3 +111,30 @@ GlobalTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { TI = insertRecordBytes(C.RecordData); return TI; } + +bool GlobalTypeTableBuilder::replaceType(TypeIndex &Index, CVType Data, + bool Stabilize) { + assert(Index.toArrayIndex() < SeenRecords.size() && + "This function cannot be used to insert records!"); + + ArrayRef<uint8_t> Record = Data.data(); + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); + + GloballyHashedType Hash = + GloballyHashedType::hashType(Record, SeenHashes, SeenHashes); + auto Result = HashedRecords.try_emplace(Hash, Index.toArrayIndex()); + if (!Result.second) { + Index = Result.first->second; + return false; // The record is already there, at a different location + } + + if (Stabilize) + Record = stabilize(RecordStorage, Record); + + SeenRecords[Index.toArrayIndex()] = Record; + SeenHashes[Index.toArrayIndex()] = Hash; + return true; +} diff --git a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp index dc1253b7a39f..06b20ba33eec 100644 --- a/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -277,3 +277,8 @@ void LazyRandomTypeCollection::visitRange(TypeIndex Begin, uint32_t BeginOffset, ++RI; } } + +bool LazyRandomTypeCollection::replaceType(TypeIndex &Index, CVType Data, + bool Stabilize) { + llvm_unreachable("Method cannot be called"); +} diff --git a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp index 4d7cd468f3ee..13ce3ae82c26 100644 --- a/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ b/llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -90,7 +90,9 @@ static inline ArrayRef<uint8_t> stabilize(BumpPtrAllocator &Alloc, TypeIndex MergingTypeTableBuilder::insertRecordAs(hash_code Hash, ArrayRef<uint8_t> &Record) { assert(Record.size() < UINT32_MAX && "Record too big"); - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); LocallyHashedType WeakHash{Hash, Record}; auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); @@ -121,3 +123,30 @@ MergingTypeTableBuilder::insertRecord(ContinuationRecordBuilder &Builder) { TI = insertRecordBytes(C.RecordData); return TI; } + +bool MergingTypeTableBuilder::replaceType(TypeIndex &Index, CVType Data, + bool Stabilize) { + assert(Index.toArrayIndex() < SeenRecords.size() && + "This function cannot be used to insert records!"); + + ArrayRef<uint8_t> Record = Data.data(); + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && + "The type record size is not a multiple of 4 bytes which will cause " + "misalignment in the output TPI stream!"); + + LocallyHashedType WeakHash{hash_value(Record), Record}; + auto Result = HashedRecords.try_emplace(WeakHash, Index.toArrayIndex()); + if (!Result.second) { + Index = Result.first->second; + return false; // The record is already there, at a different location + } + + if (Stabilize) { + Record = stabilize(RecordStorage, Record); + Result.first->first.RecordData = Record; + } + + SeenRecords[Index.toArrayIndex()] = Record; + return true; +} diff --git a/llvm/lib/DebugInfo/CodeView/RecordName.cpp b/llvm/lib/DebugInfo/CodeView/RecordName.cpp index cfaad1581159..47b5498181b7 100644 --- a/llvm/lib/DebugInfo/CodeView/RecordName.cpp +++ b/llvm/lib/DebugInfo/CodeView/RecordName.cpp @@ -253,7 +253,7 @@ std::string llvm::codeview::computeTypeName(TypeCollection &Types, consumeError(std::move(EC)); return "<unknown UDT>"; } - return Computer.name(); + return std::string(Computer.name()); } static int getSymbolNameOffset(CVSymbol Sym) { diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp index 654c40a7470d..ac3b30175956 100644 --- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp +++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp @@ -1,4 +1,15 @@ +//===- SimpleTypeSerializer.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h" +#include "llvm/Support/BinaryStreamWriter.h" using namespace llvm; using namespace llvm::codeview; diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp index 1aded589e565..bb71c86a0609 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp @@ -99,12 +99,12 @@ static std::string getMemberAttributes(CodeViewRecordIO &IO, MethodOptions Options) { if (!IO.isStreaming()) return ""; - std::string AccessSpecifier = - getEnumName(IO, uint8_t(Access), makeArrayRef(getMemberAccessNames())); + std::string AccessSpecifier = std::string( + getEnumName(IO, uint8_t(Access), makeArrayRef(getMemberAccessNames()))); std::string MemberAttrs(AccessSpecifier); if (Kind != MethodKind::Vanilla) { - std::string MethodKind = - getEnumName(IO, unsigned(Kind), makeArrayRef(getMemberKindNames())); + std::string MethodKind = std::string( + getEnumName(IO, unsigned(Kind), makeArrayRef(getMemberKindNames()))); MemberAttrs += ", " + MethodKind; } if (Options != MethodOptions::None) { @@ -201,8 +201,8 @@ Error TypeRecordMapping::visitTypeBegin(CVType &CVR) { if (IO.isStreaming()) { auto RecordKind = CVR.kind(); uint16_t RecordLen = CVR.length() - 2; - std::string RecordKindName = - getEnumName(IO, unsigned(RecordKind), makeArrayRef(LeafTypeNames)); + std::string RecordKindName = std::string( + getEnumName(IO, unsigned(RecordKind), makeArrayRef(LeafTypeNames))); error(IO.mapInteger(RecordLen, "Record length")); error(IO.mapEnum(RecordKind, "Record kind: " + RecordKindName)); } @@ -241,7 +241,7 @@ Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) { MemberKind = Record.Kind; if (IO.isStreaming()) { - std::string MemberKindName = getLeafTypeName(Record.Kind); + std::string MemberKindName = std::string(getLeafTypeName(Record.Kind)); MemberKindName += " ( " + (getEnumName(IO, unsigned(Record.Kind), makeArrayRef(LeafTypeNames))) @@ -277,8 +277,8 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) { Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ProcedureRecord &Record) { - std::string CallingConvName = getEnumName( - IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string CallingConvName = std::string(getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions()))); std::string FuncOptionNames = getFlagNames(IO, static_cast<uint16_t>(Record.Options), makeArrayRef(getFunctionOptionEnum())); @@ -293,8 +293,8 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, Error TypeRecordMapping::visitKnownRecord(CVType &CVR, MemberFunctionRecord &Record) { - std::string CallingConvName = getEnumName( - IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions())); + std::string CallingConvName = std::string(getEnumName( + IO, uint8_t(Record.CallConv), makeArrayRef(getCallingConventions()))); std::string FuncOptionNames = getFlagNames(IO, static_cast<uint16_t>(Record.Options), makeArrayRef(getFunctionOptionEnum())); @@ -337,12 +337,13 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { SmallString<128> Attr("Attrs: "); if (IO.isStreaming()) { - std::string PtrType = getEnumName(IO, unsigned(Record.getPointerKind()), - makeArrayRef(getPtrKindNames())); + std::string PtrType = + std::string(getEnumName(IO, unsigned(Record.getPointerKind()), + makeArrayRef(getPtrKindNames()))); Attr += "[ Type: " + PtrType; - std::string PtrMode = getEnumName(IO, unsigned(Record.getMode()), - makeArrayRef(getPtrModeNames())); + std::string PtrMode = std::string(getEnumName( + IO, unsigned(Record.getMode()), makeArrayRef(getPtrModeNames()))); Attr += ", Mode: " + PtrMode; auto PtrSizeOf = Record.getSize(); @@ -374,8 +375,8 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) { MemberPointerInfo &M = *Record.MemberInfo; error(IO.mapInteger(M.ContainingType, "ClassType")); - std::string PtrMemberGetRepresentation = getEnumName( - IO, uint16_t(M.Representation), makeArrayRef(getPtrMemberRepNames())); + std::string PtrMemberGetRepresentation = std::string(getEnumName( + IO, uint16_t(M.Representation), makeArrayRef(getPtrMemberRepNames()))); error(IO.mapEnum(M.Representation, "Representation: " + PtrMemberGetRepresentation)); } @@ -581,8 +582,8 @@ Error TypeRecordMapping::visitKnownRecord(CVType &CVR, } Error TypeRecordMapping::visitKnownRecord(CVType &CVR, LabelRecord &Record) { - std::string ModeName = - getEnumName(IO, uint16_t(Record.Mode), makeArrayRef(getLabelTypeEnum())); + std::string ModeName = std::string( + getEnumName(IO, uint16_t(Record.Mode), makeArrayRef(getLabelTypeEnum()))); error(IO.mapEnum(Record.Mode, "Mode: " + ModeName)); return Error::success(); } diff --git a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index f9fca74a2199..8c4b640bcd19 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -360,16 +360,18 @@ Error TypeStreamMerger::remapType(const CVType &Type) { [this, Type](MutableArrayRef<uint8_t> Storage) -> ArrayRef<uint8_t> { return remapIndices(Type, Storage); }; + unsigned AlignedSize = alignTo(Type.RecordData.size(), 4); + if (LLVM_LIKELY(UseGlobalHashes)) { GlobalTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestGlobalIdStream : *DestGlobalTypeStream; GloballyHashedType H = GlobalHashes[CurIndex.toArrayIndex()]; - DestIdx = Dest.insertRecordAs(H, Type.RecordData.size(), DoSerialize); + DestIdx = Dest.insertRecordAs(H, AlignedSize, DoSerialize); } else { MergingTypeTableBuilder &Dest = isIdRecord(Type.kind()) ? *DestIdStream : *DestTypeStream; - RemapStorage.resize(Type.RecordData.size()); + RemapStorage.resize(AlignedSize); ArrayRef<uint8_t> Result = DoSerialize(RemapStorage); if (!Result.empty()) DestIdx = Dest.insertRecordBytes(Result); @@ -386,9 +388,14 @@ Error TypeStreamMerger::remapType(const CVType &Type) { ArrayRef<uint8_t> TypeStreamMerger::remapIndices(const CVType &OriginalType, MutableArrayRef<uint8_t> Storage) { + unsigned Align = OriginalType.RecordData.size() & 3; + assert(Storage.size() == alignTo(OriginalType.RecordData.size(), 4) && + "The storage buffer size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); + SmallVector<TiReference, 4> Refs; discoverTypeIndices(OriginalType.RecordData, Refs); - if (Refs.empty()) + if (Refs.empty() && Align == 0) return OriginalType.RecordData; ::memcpy(Storage.data(), OriginalType.RecordData.data(), @@ -408,6 +415,16 @@ TypeStreamMerger::remapIndices(const CVType &OriginalType, return {}; } } + + if (Align > 0) { + RecordPrefix *StorageHeader = + reinterpret_cast<RecordPrefix *>(Storage.data()); + StorageHeader->RecordLen += 4 - Align; + + DestContent = Storage.data() + OriginalType.RecordData.size(); + for (; Align < 4; ++Align) + *DestContent++ = LF_PAD4 - Align; + } return Storage; } diff --git a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp index e13068b5b1eb..e517e8846d69 100644 --- a/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -58,3 +58,8 @@ bool TypeTableCollection::contains(TypeIndex Index) { uint32_t TypeTableCollection::size() { return Records.size(); } uint32_t TypeTableCollection::capacity() { return Records.size(); } + +bool TypeTableCollection::replaceType(TypeIndex &Index, CVType Data, + bool Stabilize) { + llvm_unreachable("Method cannot be called"); +} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp index abbea3a868c8..ee1ff5460b9b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp @@ -150,6 +150,8 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const { Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue( const uint64_t DIEOffset, const dwarf::Attribute Attr, const DWARFUnit &U) const { + // Check if this abbreviation has this attribute without needing to skip + // any data so we can return quickly if it doesn't. Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr); if (!MatchAttrIndex) return None; @@ -159,26 +161,24 @@ Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue( // Add the byte size of ULEB that for the abbrev Code so we can start // skipping the attribute data. uint64_t Offset = DIEOffset + CodeByteSize; - uint32_t AttrIndex = 0; - for (const auto &Spec : AttributeSpecs) { - if (*MatchAttrIndex == AttrIndex) { - // We have arrived at the attribute to extract, extract if from Offset. - if (Spec.isImplicitConst()) - return DWARFFormValue::createFromSValue(Spec.Form, - Spec.getImplicitConstValue()); - - DWARFFormValue FormValue(Spec.Form); - if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U)) - return FormValue; - } - // March Offset along until we get to the attribute we want. - if (auto FixedSize = Spec.getByteSize(U)) + for (uint32_t CurAttrIdx = 0; CurAttrIdx != *MatchAttrIndex; ++CurAttrIdx) + // Match Offset along until we get to the attribute we want. + if (auto FixedSize = AttributeSpecs[CurAttrIdx].getByteSize(U)) Offset += *FixedSize; else - DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, - U.getFormParams()); - ++AttrIndex; - } + DWARFFormValue::skipValue(AttributeSpecs[CurAttrIdx].Form, DebugInfoData, + &Offset, U.getFormParams()); + + // We have arrived at the attribute to extract, extract if from Offset. + const AttributeSpec &Spec = AttributeSpecs[*MatchAttrIndex]; + if (Spec.isImplicitConst()) + return DWARFFormValue::createFromSValue(Spec.Form, + Spec.getImplicitConstValue()); + + DWARFFormValue FormValue(Spec.Form); + if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U)) + return FormValue; + return None; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 575edba51ee8..28d35b609c24 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -365,8 +365,8 @@ AppleAcceleratorTable::equal_range(StringRef Key) const { void DWARFDebugNames::Header::dump(ScopedPrinter &W) const { DictScope HeaderScope(W, "Header"); W.printHex("Length", UnitLength); + W.printString("Format", dwarf::FormatString(Format)); W.printNumber("Version", Version); - W.printHex("Padding", Padding); W.printNumber("CU count", CompUnitCount); W.printNumber("Local TU count", LocalTypeUnitCount); W.printNumber("Foreign TU count", ForeignTypeUnitCount); @@ -378,30 +378,36 @@ void DWARFDebugNames::Header::dump(ScopedPrinter &W) const { Error DWARFDebugNames::Header::extract(const DWARFDataExtractor &AS, uint64_t *Offset) { - // Check that we can read the fixed-size part. - if (!AS.isValidOffset(*Offset + sizeof(HeaderPOD) - 1)) + auto HeaderError = [Offset = *Offset](Error E) { return createStringError(errc::illegal_byte_sequence, - "Section too small: cannot read header."); - - UnitLength = AS.getU32(Offset); - Version = AS.getU16(Offset); - Padding = AS.getU16(Offset); - CompUnitCount = AS.getU32(Offset); - LocalTypeUnitCount = AS.getU32(Offset); - ForeignTypeUnitCount = AS.getU32(Offset); - BucketCount = AS.getU32(Offset); - NameCount = AS.getU32(Offset); - AbbrevTableSize = AS.getU32(Offset); - AugmentationStringSize = alignTo(AS.getU32(Offset), 4); - - if (!AS.isValidOffsetForDataOfSize(*Offset, AugmentationStringSize)) - return createStringError( - errc::illegal_byte_sequence, - "Section too small: cannot read header augmentation."); + "parsing .debug_names header at 0x%" PRIx64 ": %s", + Offset, toString(std::move(E)).c_str()); + }; + + DataExtractor::Cursor C(*Offset); + std::tie(UnitLength, Format) = AS.getInitialLength(C); + + Version = AS.getU16(C); + AS.skip(C, 2); // padding + CompUnitCount = AS.getU32(C); + LocalTypeUnitCount = AS.getU32(C); + ForeignTypeUnitCount = AS.getU32(C); + BucketCount = AS.getU32(C); + NameCount = AS.getU32(C); + AbbrevTableSize = AS.getU32(C); + AugmentationStringSize = alignTo(AS.getU32(C), 4); + + if (!C) + return HeaderError(C.takeError()); + + if (!AS.isValidOffsetForDataOfSize(C.tell(), AugmentationStringSize)) + return HeaderError(createStringError(errc::illegal_byte_sequence, + "cannot read header augmentation")); AugmentationString.resize(AugmentationStringSize); - AS.getU8(Offset, reinterpret_cast<uint8_t *>(AugmentationString.data()), + AS.getU8(C, reinterpret_cast<uint8_t *>(AugmentationString.data()), AugmentationStringSize); - return Error::success(); + *Offset = C.tell(); + return C.takeError(); } void DWARFDebugNames::Abbrev::dump(ScopedPrinter &W) const { @@ -486,9 +492,10 @@ Error DWARFDebugNames::NameIndex::extract() { if (Error E = Hdr.extract(AS, &Offset)) return E; + const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); CUsBase = Offset; - Offset += Hdr.CompUnitCount * 4; - Offset += Hdr.LocalTypeUnitCount * 4; + Offset += Hdr.CompUnitCount * SectionOffsetSize; + Offset += Hdr.LocalTypeUnitCount * SectionOffsetSize; Offset += Hdr.ForeignTypeUnitCount * 8; BucketsBase = Offset; Offset += Hdr.BucketCount * 4; @@ -496,9 +503,9 @@ Error DWARFDebugNames::NameIndex::extract() { if (Hdr.BucketCount > 0) Offset += Hdr.NameCount * 4; StringOffsetsBase = Offset; - Offset += Hdr.NameCount * 4; + Offset += Hdr.NameCount * SectionOffsetSize; EntryOffsetsBase = Offset; - Offset += Hdr.NameCount * 4; + Offset += Hdr.NameCount * SectionOffsetSize; if (!AS.isValidOffsetForDataOfSize(Offset, Hdr.AbbrevTableSize)) return createStringError(errc::illegal_byte_sequence, @@ -579,20 +586,24 @@ std::error_code DWARFDebugNames::SentinelError::convertToErrorCode() const { uint64_t DWARFDebugNames::NameIndex::getCUOffset(uint32_t CU) const { assert(CU < Hdr.CompUnitCount); - uint64_t Offset = CUsBase + 4 * CU; - return Section.AccelSection.getRelocatedValue(4, &Offset); + const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); + uint64_t Offset = CUsBase + SectionOffsetSize * CU; + return Section.AccelSection.getRelocatedValue(SectionOffsetSize, &Offset); } uint64_t DWARFDebugNames::NameIndex::getLocalTUOffset(uint32_t TU) const { assert(TU < Hdr.LocalTypeUnitCount); - uint64_t Offset = CUsBase + 4 * (Hdr.CompUnitCount + TU); - return Section.AccelSection.getRelocatedValue(4, &Offset); + const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); + uint64_t Offset = CUsBase + SectionOffsetSize * (Hdr.CompUnitCount + TU); + return Section.AccelSection.getRelocatedValue(SectionOffsetSize, &Offset); } uint64_t DWARFDebugNames::NameIndex::getForeignTUSignature(uint32_t TU) const { assert(TU < Hdr.ForeignTypeUnitCount); + const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); uint64_t Offset = - CUsBase + 4 * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU; + CUsBase + + SectionOffsetSize * (Hdr.CompUnitCount + Hdr.LocalTypeUnitCount) + 8 * TU; return Section.AccelSection.getU64(&Offset); } @@ -613,7 +624,7 @@ DWARFDebugNames::NameIndex::getEntry(uint64_t *Offset) const { Entry E(*this, *AbbrevIt); - dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; + dwarf::FormParams FormParams = {Hdr.Version, 0, Hdr.Format}; for (auto &Value : E.Values) { if (!Value.extractValue(AS, Offset, FormParams)) return createStringError(errc::io_error, @@ -625,12 +636,16 @@ DWARFDebugNames::NameIndex::getEntry(uint64_t *Offset) const { DWARFDebugNames::NameTableEntry DWARFDebugNames::NameIndex::getNameTableEntry(uint32_t Index) const { assert(0 < Index && Index <= Hdr.NameCount); - uint64_t StringOffsetOffset = StringOffsetsBase + 4 * (Index - 1); - uint64_t EntryOffsetOffset = EntryOffsetsBase + 4 * (Index - 1); + const unsigned SectionOffsetSize = dwarf::getDwarfOffsetByteSize(Hdr.Format); + uint64_t StringOffsetOffset = + StringOffsetsBase + SectionOffsetSize * (Index - 1); + uint64_t EntryOffsetOffset = + EntryOffsetsBase + SectionOffsetSize * (Index - 1); const DWARFDataExtractor &AS = Section.AccelSection; - uint64_t StringOffset = AS.getRelocatedValue(4, &StringOffsetOffset); - uint64_t EntryOffset = AS.getU32(&EntryOffsetOffset); + uint64_t StringOffset = + AS.getRelocatedValue(SectionOffsetSize, &StringOffsetOffset); + uint64_t EntryOffset = AS.getUnsigned(&EntryOffsetOffset, SectionOffsetSize); EntryOffset += EntriesBase; return {Section.StringSection, Index, StringOffset, EntryOffset}; } @@ -859,13 +874,14 @@ void DWARFDebugNames::ValueIterator::next() { DWARFDebugNames::ValueIterator::ValueIterator(const DWARFDebugNames &AccelTable, StringRef Key) - : CurrentIndex(AccelTable.NameIndices.begin()), IsLocal(false), Key(Key) { + : CurrentIndex(AccelTable.NameIndices.begin()), IsLocal(false), + Key(std::string(Key)) { searchFromStartOfCurrentIndex(); } DWARFDebugNames::ValueIterator::ValueIterator( const DWARFDebugNames::NameIndex &NI, StringRef Key) - : CurrentIndex(&NI), IsLocal(true), Key(Key) { + : CurrentIndex(&NI), IsLocal(true), Key(std::string(Key)) { if (!findInCurrentIndex()) setEnd(); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp index f59e49268288..9bd134105c9b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp @@ -15,16 +15,18 @@ using namespace llvm; void DWARFCompileUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(getFormat()); OS << format("0x%08" PRIx64, getOffset()) << ": Compile Unit:" - << " length = " << format("0x%08" PRIx64, getLength()) - << " version = " << format("0x%04x", getVersion()); + << " length = " << format("0x%0*" PRIx64, OffsetDumpWidth, getLength()) + << ", format = " << dwarf::FormatString(getFormat()) + << ", version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) - OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " + OS << ", unit_type = " << dwarf::UnitTypeString(getUnitType()); + OS << ", abbr_offset = " << format("0x%04" PRIx64, getAbbreviations()->getOffset()) - << " addr_size = " << format("0x%02x", getAddressByteSize()); + << ", addr_size = " << format("0x%02x", getAddressByteSize()); if (getVersion() >= 5 && getUnitType() != dwarf::DW_UT_compile) - OS << " DWO_id = " << format("0x%016" PRIx64, *getDWOId()); + OS << ", DWO_id = " << format("0x%016" PRIx64, *getDWOId()); OS << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) << ")\n"; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index aaa6d5250f23..bf6219497770 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -45,7 +45,6 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cstdint> @@ -66,8 +65,12 @@ using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; DWARFContext::DWARFContext(std::unique_ptr<const DWARFObject> DObj, - std::string DWPName) - : DIContext(CK_DWARF), DWPName(std::move(DWPName)), DObj(std::move(DObj)) {} + std::string DWPName, + std::function<void(Error)> RecoverableErrorHandler, + std::function<void(Error)> WarningHandler) + : DIContext(CK_DWARF), DWPName(std::move(DWPName)), + RecoverableErrorHandler(RecoverableErrorHandler), + WarningHandler(WarningHandler), DObj(std::move(DObj)) {} DWARFContext::~DWARFContext() = default; @@ -130,10 +133,21 @@ collectContributionData(DWARFContext::unit_iterator_range Units) { return Contributions; } -static void dumpDWARFv5StringOffsetsSection( - raw_ostream &OS, StringRef SectionName, const DWARFObject &Obj, - const DWARFSection &StringOffsetsSection, StringRef StringSection, - DWARFContext::unit_iterator_range Units, bool LittleEndian) { +// Dump a DWARF string offsets section. This may be a DWARF v5 formatted +// string offsets section, where each compile or type unit contributes a +// number of entries (string offsets), with each contribution preceded by +// a header containing size and version number. Alternatively, it may be a +// monolithic series of string offsets, as generated by the pre-DWARF v5 +// implementation of split DWARF; however, in that case we still need to +// collect contributions of units because the size of the offsets (4 or 8 +// bytes) depends on the format of the referencing unit (DWARF32 or DWARF64). +static void dumpStringOffsetsSection(raw_ostream &OS, DIDumpOptions DumpOpts, + StringRef SectionName, + const DWARFObject &Obj, + const DWARFSection &StringOffsetsSection, + StringRef StringSection, + DWARFContext::unit_iterator_range Units, + bool LittleEndian) { auto Contributions = collectContributionData(Units); DWARFDataExtractor StrOffsetExt(Obj, StringOffsetsSection, LittleEndian, 0); DataExtractor StrData(StringSection, LittleEndian, 0); @@ -148,6 +162,7 @@ static void dumpDWARFv5StringOffsetsSection( } dwarf::DwarfFormat Format = Contribution->getFormat(); + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); uint16_t Version = Contribution->getVersion(); uint64_t ContributionHeader = Contribution->Base; // In DWARF v5 there is a contribution header that immediately precedes @@ -159,10 +174,10 @@ static void dumpDWARFv5StringOffsetsSection( // Detect overlapping contributions. if (Offset > ContributionHeader) { - WithColor::error() - << "overlapping contributions to string offsets table in section ." - << SectionName << ".\n"; - return; + DumpOpts.RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "overlapping contributions to string offsets table in section .%s.", + SectionName.data())); } // Report a gap in the table. if (Offset < ContributionHeader) { @@ -175,7 +190,7 @@ static void dumpDWARFv5StringOffsetsSection( // version field and the padding, a total of 4 bytes). Add them back in // for reporting. OS << "Contribution size = " << (Contribution->Size + (Version < 5 ? 0 : 4)) - << ", Format = " << (Format == DWARF32 ? "DWARF32" : "DWARF64") + << ", Format = " << dwarf::FormatString(Format) << ", Version = " << Version << "\n"; Offset = Contribution->Base; @@ -184,7 +199,7 @@ static void dumpDWARFv5StringOffsetsSection( OS << format("0x%8.8" PRIx64 ": ", Offset); uint64_t StringOffset = StrOffsetExt.getRelocatedValue(EntrySize, &Offset); - OS << format("%8.8" PRIx64 " ", StringOffset); + OS << format("%0*" PRIx64 " ", OffsetDumpWidth, StringOffset); const char *S = StrData.getCStr(&StringOffset); if (S) OS << format("\"%s\"", S); @@ -198,47 +213,6 @@ static void dumpDWARFv5StringOffsetsSection( } } -// Dump a DWARF string offsets section. This may be a DWARF v5 formatted -// string offsets section, where each compile or type unit contributes a -// number of entries (string offsets), with each contribution preceded by -// a header containing size and version number. Alternatively, it may be a -// monolithic series of string offsets, as generated by the pre-DWARF v5 -// implementation of split DWARF. -static void dumpStringOffsetsSection(raw_ostream &OS, StringRef SectionName, - const DWARFObject &Obj, - const DWARFSection &StringOffsetsSection, - StringRef StringSection, - DWARFContext::unit_iterator_range Units, - bool LittleEndian, unsigned MaxVersion) { - // If we have at least one (compile or type) unit with DWARF v5 or greater, - // we assume that the section is formatted like a DWARF v5 string offsets - // section. - if (MaxVersion >= 5) - dumpDWARFv5StringOffsetsSection(OS, SectionName, Obj, StringOffsetsSection, - StringSection, Units, LittleEndian); - else { - DataExtractor strOffsetExt(StringOffsetsSection.Data, LittleEndian, 0); - uint64_t offset = 0; - uint64_t size = StringOffsetsSection.Data.size(); - // Ensure that size is a multiple of the size of an entry. - if (size & ((uint64_t)(sizeof(uint32_t) - 1))) { - OS << "error: size of ." << SectionName << " is not a multiple of " - << sizeof(uint32_t) << ".\n"; - size &= -(uint64_t)sizeof(uint32_t); - } - DataExtractor StrData(StringSection, LittleEndian, 0); - while (offset < size) { - OS << format("0x%8.8" PRIx64 ": ", offset); - uint64_t StringOffset = strOffsetExt.getU32(&offset); - OS << format("%8.8" PRIx64 " ", StringOffset); - const char *S = StrData.getCStr(&StringOffset); - if (S) - OS << format("\"%s\"", S); - OS << "\n"; - } - } -} - // Dump the .debug_addr section. static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, DIDumpOptions DumpOpts, uint16_t Version, @@ -248,16 +222,17 @@ static void dumpAddrSection(raw_ostream &OS, DWARFDataExtractor &AddrData, DWARFDebugAddrTable AddrTable; uint64_t TableOffset = Offset; if (Error Err = AddrTable.extract(AddrData, &Offset, Version, AddrSize, - DWARFContext::dumpWarning)) { - WithColor::error() << toString(std::move(Err)) << '\n'; + DumpOpts.WarningHandler)) { + DumpOpts.RecoverableErrorHandler(std::move(Err)); // Keep going after an error, if we can, assuming that the length field // could be read. If it couldn't, stop reading the section. - if (!AddrTable.hasValidLength()) - break; - Offset = TableOffset + AddrTable.getLength(); - } else { - AddrTable.dump(OS, DumpOpts); + if (auto TableLength = AddrTable.getFullLength()) { + Offset = TableOffset + *TableLength; + continue; + } + break; } + AddrTable.dump(OS, DumpOpts); } } @@ -272,7 +247,7 @@ static void dumpRnglistsSection( llvm::DWARFDebugRnglistTable Rnglists; uint64_t TableOffset = Offset; if (Error Err = Rnglists.extract(rnglistData, &Offset)) { - WithColor::error() << toString(std::move(Err)) << '\n'; + DumpOpts.RecoverableErrorHandler(std::move(Err)); uint64_t Length = Rnglists.length(); // Keep going after an error, if we can, assuming that the length field // could be read. If it couldn't, stop reading the section. @@ -285,6 +260,48 @@ static void dumpRnglistsSection( } } +std::unique_ptr<DWARFDebugMacro> +DWARFContext::parseMacroOrMacinfo(MacroSecType SectionType) { + auto Macro = std::make_unique<DWARFDebugMacro>(); + auto ParseAndDump = [&](DWARFDataExtractor &Data, bool IsMacro) { + if (Error Err = IsMacro ? Macro->parseMacro(SectionType == MacroSection + ? compile_units() + : dwo_compile_units(), + SectionType == MacroSection + ? getStringExtractor() + : getStringDWOExtractor(), + Data) + : Macro->parseMacinfo(Data)) { + RecoverableErrorHandler(std::move(Err)); + Macro = nullptr; + } + }; + switch (SectionType) { + case MacinfoSection: { + DWARFDataExtractor Data(DObj->getMacinfoSection(), isLittleEndian(), 0); + ParseAndDump(Data, /*IsMacro=*/false); + break; + } + case MacinfoDwoSection: { + DWARFDataExtractor Data(DObj->getMacinfoDWOSection(), isLittleEndian(), 0); + ParseAndDump(Data, /*IsMacro=*/false); + break; + } + case MacroSection: { + DWARFDataExtractor Data(*DObj, DObj->getMacroSection(), isLittleEndian(), + 0); + ParseAndDump(Data, /*IsMacro=*/true); + break; + } + case MacroDwoSection: { + DWARFDataExtractor Data(DObj->getMacroDWOSection(), isLittleEndian(), 0); + ParseAndDump(Data, /*IsMacro=*/true); + break; + } + } + return Macro; +} + static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, DWARFDataExtractor Data, const MCRegisterInfo *MRI, @@ -295,7 +312,7 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, while (Data.isValidOffset(Offset)) { DWARFListTableHeader Header(".debug_loclists", "locations"); if (Error E = Header.extract(Data, &Offset)) { - WithColor::error() << toString(std::move(E)) << '\n'; + DumpOpts.RecoverableErrorHandler(std::move(E)); return; } @@ -319,10 +336,16 @@ static void dumpLoclistsSection(raw_ostream &OS, DIDumpOptions DumpOpts, } } +static void dumpPubTableSection(raw_ostream &OS, DIDumpOptions DumpOpts, + DWARFDataExtractor Data, bool GnuStyle) { + DWARFDebugPubTable Table; + Table.extract(Data, GnuStyle, DumpOpts.RecoverableErrorHandler); + Table.dump(OS); +} + void DWARFContext::dump( raw_ostream &OS, DIDumpOptions DumpOpts, std::array<Optional<uint64_t>, DIDT_ID_Count> DumpOffsets) { - uint64_t DumpType = DumpOpts.DumpType; StringRef Extension = sys::path::extension(DObj->getFileName()); @@ -430,31 +453,61 @@ void DWARFContext::dump( } } - if (const auto *Off = shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame, - DObj->getFrameSection().Data)) - getDebugFrame()->dump(OS, getRegisterInfo(), *Off); + if (const Optional<uint64_t> *Off = + shouldDump(Explicit, ".debug_frame", DIDT_ID_DebugFrame, + DObj->getFrameSection().Data)) { + if (Expected<const DWARFDebugFrame *> DF = getDebugFrame()) + (*DF)->dump(OS, getRegisterInfo(), *Off); + else + RecoverableErrorHandler(DF.takeError()); + } - if (const auto *Off = shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame, - DObj->getEHFrameSection().Data)) - getEHFrame()->dump(OS, getRegisterInfo(), *Off); + if (const Optional<uint64_t> *Off = + shouldDump(Explicit, ".eh_frame", DIDT_ID_DebugFrame, + DObj->getEHFrameSection().Data)) { + if (Expected<const DWARFDebugFrame *> DF = getEHFrame()) + (*DF)->dump(OS, getRegisterInfo(), *Off); + else + RecoverableErrorHandler(DF.takeError()); + } - if (DumpType & DIDT_DebugMacro) { - if (Explicit || !getDebugMacro()->empty()) { - OS << "\n.debug_macinfo contents:\n"; - getDebugMacro()->dump(OS); - } else if (ExplicitDWO || !getDebugMacroDWO()->empty()) { - OS << "\n.debug_macinfo.dwo contents:\n"; - getDebugMacroDWO()->dump(OS); - } + if (shouldDump(Explicit, ".debug_macro", DIDT_ID_DebugMacro, + DObj->getMacroSection().Data)) { + if (auto Macro = getDebugMacro()) + Macro->dump(OS); + } + + if (shouldDump(Explicit, ".debug_macro.dwo", DIDT_ID_DebugMacro, + DObj->getMacroDWOSection())) { + if (auto MacroDWO = getDebugMacroDWO()) + MacroDWO->dump(OS); + } + + if (shouldDump(Explicit, ".debug_macinfo", DIDT_ID_DebugMacro, + DObj->getMacinfoSection())) { + if (auto Macinfo = getDebugMacinfo()) + Macinfo->dump(OS); + } + + if (shouldDump(Explicit, ".debug_macinfo.dwo", DIDT_ID_DebugMacro, + DObj->getMacinfoDWOSection())) { + if (auto MacinfoDWO = getDebugMacinfoDWO()) + MacinfoDWO->dump(OS); } if (shouldDump(Explicit, ".debug_aranges", DIDT_ID_DebugAranges, DObj->getArangesSection())) { uint64_t offset = 0; - DataExtractor arangesData(DObj->getArangesSection(), isLittleEndian(), 0); + DWARFDataExtractor arangesData(DObj->getArangesSection(), isLittleEndian(), + 0); DWARFDebugArangeSet set; - while (set.extract(arangesData, &offset)) + while (arangesData.isValidOffset(offset)) { + if (Error E = set.extract(arangesData, &offset)) { + RecoverableErrorHandler(std::move(E)); + break; + } set.dump(OS); + } } auto DumpLineSection = [&](DWARFDebugLine::SectionParser Parser, @@ -462,18 +515,13 @@ void DWARFContext::dump( Optional<uint64_t> DumpOffset) { while (!Parser.done()) { if (DumpOffset && Parser.getOffset() != *DumpOffset) { - Parser.skip(dumpWarning); + Parser.skip(DumpOpts.WarningHandler, DumpOpts.WarningHandler); continue; } OS << "debug_line[" << format("0x%8.8" PRIx64, Parser.getOffset()) << "]\n"; - if (DumpOpts.Verbose) { - Parser.parseNext(dumpWarning, dumpWarning, &OS); - } else { - DWARFDebugLine::LineTable LineTable = - Parser.parseNext(dumpWarning, dumpWarning); - LineTable.dump(OS, DumpOpts); - } + Parser.parseNext(DumpOpts.WarningHandler, DumpOpts.WarningHandler, &OS, + DumpOpts.Verbose); } }; @@ -555,7 +603,7 @@ void DWARFContext::dump( DWARFDebugRangeList rangeList; while (rangesData.isValidOffset(offset)) { if (Error E = rangeList.extract(rangesData, &offset)) { - WithColor::error() << toString(std::move(E)) << '\n'; + DumpOpts.RecoverableErrorHandler(std::move(E)); break; } rangeList.dump(OS); @@ -585,39 +633,44 @@ void DWARFContext::dump( } if (shouldDump(Explicit, ".debug_pubnames", DIDT_ID_DebugPubnames, - DObj->getPubnamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubnamesSection(), isLittleEndian(), false) - .dump(OS); + DObj->getPubnamesSection().Data)) { + DWARFDataExtractor PubTableData(*DObj, DObj->getPubnamesSection(), + isLittleEndian(), 0); + dumpPubTableSection(OS, DumpOpts, PubTableData, /*GnuStyle=*/false); + } if (shouldDump(Explicit, ".debug_pubtypes", DIDT_ID_DebugPubtypes, - DObj->getPubtypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getPubtypesSection(), isLittleEndian(), false) - .dump(OS); + DObj->getPubtypesSection().Data)) { + DWARFDataExtractor PubTableData(*DObj, DObj->getPubtypesSection(), + isLittleEndian(), 0); + dumpPubTableSection(OS, DumpOpts, PubTableData, /*GnuStyle=*/false); + } if (shouldDump(Explicit, ".debug_gnu_pubnames", DIDT_ID_DebugGnuPubnames, - DObj->getGnuPubnamesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubnamesSection(), isLittleEndian(), - true /* GnuStyle */) - .dump(OS); + DObj->getGnuPubnamesSection().Data)) { + DWARFDataExtractor PubTableData(*DObj, DObj->getGnuPubnamesSection(), + isLittleEndian(), 0); + dumpPubTableSection(OS, DumpOpts, PubTableData, /*GnuStyle=*/true); + } if (shouldDump(Explicit, ".debug_gnu_pubtypes", DIDT_ID_DebugGnuPubtypes, - DObj->getGnuPubtypesSection().Data)) - DWARFDebugPubTable(*DObj, DObj->getGnuPubtypesSection(), isLittleEndian(), - true /* GnuStyle */) - .dump(OS); + DObj->getGnuPubtypesSection().Data)) { + DWARFDataExtractor PubTableData(*DObj, DObj->getGnuPubtypesSection(), + isLittleEndian(), 0); + dumpPubTableSection(OS, DumpOpts, PubTableData, /*GnuStyle=*/true); + } if (shouldDump(Explicit, ".debug_str_offsets", DIDT_ID_DebugStrOffsets, DObj->getStrOffsetsSection().Data)) - dumpStringOffsetsSection(OS, "debug_str_offsets", *DObj, - DObj->getStrOffsetsSection(), - DObj->getStrSection(), normal_units(), - isLittleEndian(), getMaxVersion()); + dumpStringOffsetsSection( + OS, DumpOpts, "debug_str_offsets", *DObj, DObj->getStrOffsetsSection(), + DObj->getStrSection(), normal_units(), isLittleEndian()); if (shouldDump(ExplicitDWO, ".debug_str_offsets.dwo", DIDT_ID_DebugStrOffsets, DObj->getStrOffsetsDWOSection().Data)) - dumpStringOffsetsSection(OS, "debug_str_offsets.dwo", *DObj, + dumpStringOffsetsSection(OS, DumpOpts, "debug_str_offsets.dwo", *DObj, DObj->getStrOffsetsDWOSection(), DObj->getStrDWOSection(), dwo_units(), - isLittleEndian(), getMaxDWOVersion()); + isLittleEndian()); if (shouldDump(Explicit, ".gdb_index", DIDT_ID_GdbIndex, DObj->getGdbIndexSection())) { @@ -711,7 +764,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() { DataExtractor TUIndexData(DObj->getTUIndexSection(), isLittleEndian(), 0); - TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_TYPES); + TUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_EXT_TYPES); TUIndex->parse(TUIndexData); return *TUIndex; } @@ -770,7 +823,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { return Aranges.get(); } -const DWARFDebugFrame *DWARFContext::getDebugFrame() { +Expected<const DWARFDebugFrame *> DWARFContext::getDebugFrame() { if (DebugFrame) return DebugFrame.get(); @@ -785,41 +838,50 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html DWARFDataExtractor debugFrameData(*DObj, DObj->getFrameSection(), isLittleEndian(), DObj->getAddressSize()); - DebugFrame.reset(new DWARFDebugFrame(getArch(), false /* IsEH */)); - DebugFrame->parse(debugFrameData); + auto DF = std::make_unique<DWARFDebugFrame>(getArch(), /*IsEH=*/false); + if (Error E = DF->parse(debugFrameData)) + return std::move(E); + + DebugFrame.swap(DF); return DebugFrame.get(); } -const DWARFDebugFrame *DWARFContext::getEHFrame() { +Expected<const DWARFDebugFrame *> DWARFContext::getEHFrame() { if (EHFrame) return EHFrame.get(); DWARFDataExtractor debugFrameData(*DObj, DObj->getEHFrameSection(), isLittleEndian(), DObj->getAddressSize()); - DebugFrame.reset(new DWARFDebugFrame(getArch(), true /* IsEH */)); - DebugFrame->parse(debugFrameData); + + auto DF = std::make_unique<DWARFDebugFrame>(getArch(), /*IsEH=*/true); + if (Error E = DF->parse(debugFrameData)) + return std::move(E); + DebugFrame.swap(DF); return DebugFrame.get(); } -const DWARFDebugMacro *DWARFContext::getDebugMacroDWO() { - if (MacroDWO) - return MacroDWO.get(); +const DWARFDebugMacro *DWARFContext::getDebugMacro() { + if (!Macro) + Macro = parseMacroOrMacinfo(MacroSection); + return Macro.get(); +} - DataExtractor MacinfoDWOData(DObj->getMacinfoDWOSection(), isLittleEndian(), - 0); - MacroDWO.reset(new DWARFDebugMacro()); - MacroDWO->parse(MacinfoDWOData); +const DWARFDebugMacro *DWARFContext::getDebugMacroDWO() { + if (!MacroDWO) + MacroDWO = parseMacroOrMacinfo(MacroDwoSection); return MacroDWO.get(); } -const DWARFDebugMacro *DWARFContext::getDebugMacro() { - if (Macro) - return Macro.get(); +const DWARFDebugMacro *DWARFContext::getDebugMacinfo() { + if (!Macinfo) + Macinfo = parseMacroOrMacinfo(MacinfoSection); + return Macinfo.get(); +} - DataExtractor MacinfoData(DObj->getMacinfoSection(), isLittleEndian(), 0); - Macro.reset(new DWARFDebugMacro()); - Macro->parse(MacinfoData); - return Macro.get(); +const DWARFDebugMacro *DWARFContext::getDebugMacinfoDWO() { + if (!MacinfoDWO) + MacinfoDWO = parseMacroOrMacinfo(MacinfoDwoSection); + return MacinfoDWO.get(); } template <typename T> @@ -865,16 +927,16 @@ const AppleAcceleratorTable &DWARFContext::getAppleObjC() { const DWARFDebugLine::LineTable * DWARFContext::getLineTableForUnit(DWARFUnit *U) { Expected<const DWARFDebugLine::LineTable *> ExpectedLineTable = - getLineTableForUnit(U, dumpWarning); + getLineTableForUnit(U, WarningHandler); if (!ExpectedLineTable) { - dumpWarning(ExpectedLineTable.takeError()); + WarningHandler(ExpectedLineTable.takeError()); return nullptr; } return *ExpectedLineTable; } Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit( - DWARFUnit *U, function_ref<void(Error)> RecoverableErrorCallback) { + DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) { if (!Line) Line.reset(new DWARFDebugLine); @@ -899,7 +961,7 @@ Expected<const DWARFDebugLine::LineTable *> DWARFContext::getLineTableForUnit( DWARFDataExtractor lineData(*DObj, U->getLineSection(), isLittleEndian(), U->getAddressByteSize()); return Line->getOrParseLineTable(lineData, stmtOffset, *this, U, - RecoverableErrorCallback); + RecoverableErrorHandler); } void DWARFContext::parseNormalUnits() { @@ -910,7 +972,7 @@ void DWARFContext::parseNormalUnits() { }); NormalUnits.finishedInfoUnits(); DObj->forEachTypesSections([&](const DWARFSection &S) { - NormalUnits.addUnitsForSection(*this, S, DW_SECT_TYPES); + NormalUnits.addUnitsForSection(*this, S, DW_SECT_EXT_TYPES); }); } @@ -922,7 +984,7 @@ void DWARFContext::parseDWOUnits(bool Lazy) { }); DWOUnits.finishedInfoUnits(); DObj->forEachTypesDWOSections([&](const DWARFSection &S) { - DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_TYPES, Lazy); + DWOUnits.addUnitsForDWOSection(*this, S, DW_SECT_EXT_TYPES, Lazy); }); } @@ -1418,11 +1480,6 @@ static bool isRelocScattered(const object::ObjectFile &Obj, return MachObj->isRelocationScattered(RelocInfo); } -ErrorPolicy DWARFContext::defaultErrorHandler(Error E) { - WithColor::error() << toString(std::move(E)) << '\n'; - return ErrorPolicy::Continue; -} - namespace { struct DWARFSectionMap final : public DWARFSection { RelocAddrMap Relocs; @@ -1467,6 +1524,7 @@ class DWARFObjInMemory final : public DWARFObject { DWARFSectionMap PubtypesSection; DWARFSectionMap GnuPubnamesSection; DWARFSectionMap GnuPubtypesSection; + DWARFSectionMap MacroSection; DWARFSectionMap *mapNameToDWARFSection(StringRef Name) { return StringSwitch<DWARFSectionMap *>(Name) @@ -1494,6 +1552,7 @@ class DWARFObjInMemory final : public DWARFObject { .Case("apple_namespaces", &AppleNamespacesSection) .Case("apple_namespac", &AppleNamespacesSection) .Case("apple_objc", &AppleObjCSection) + .Case("debug_macro", &MacroSection) .Default(nullptr); } @@ -1502,6 +1561,7 @@ class DWARFObjInMemory final : public DWARFObject { StringRef StrSection; StringRef MacinfoSection; StringRef MacinfoDWOSection; + StringRef MacroDWOSection; StringRef AbbrevDWOSection; StringRef StrDWOSection; StringRef CUIndexSection; @@ -1522,6 +1582,7 @@ class DWARFObjInMemory final : public DWARFObject { .Case("debug_str", &StrSection) .Case("debug_macinfo", &MacinfoSection) .Case("debug_macinfo.dwo", &MacinfoDWOSection) + .Case("debug_macro.dwo", &MacroDWOSection) .Case("debug_abbrev.dwo", &AbbrevDWOSection) .Case("debug_str.dwo", &StrDWOSection) .Case("debug_cu_index", &CUIndexSection) @@ -1574,7 +1635,7 @@ public: } } DWARFObjInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L, - function_ref<ErrorPolicy(Error)> HandleError) + function_ref<void(Error)> HandleError, function_ref<void(Error)> HandleWarning ) : IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()), FileName(Obj.getFileName()), Obj(&Obj) { @@ -1601,10 +1662,8 @@ public: StringRef Data; Expected<section_iterator> SecOrErr = Section.getRelocatedSection(); if (!SecOrErr) { - ErrorPolicy EP = HandleError(createError( - "failed to get relocated section: ", SecOrErr.takeError())); - if (EP == ErrorPolicy::Halt) - return; + HandleError(createError("failed to get relocated section: ", + SecOrErr.takeError())); continue; } @@ -1622,10 +1681,8 @@ public: } if (auto Err = maybeDecompress(Section, Name, Data)) { - ErrorPolicy EP = HandleError(createError( - "failed to decompress '" + Name + "', ", std::move(Err))); - if (EP == ErrorPolicy::Halt) - return; + HandleError(createError("failed to decompress '" + Name + "', ", + std::move(Err))); continue; } @@ -1726,8 +1783,7 @@ public: Expected<SymInfo> SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache); if (!SymInfoOrErr) { - if (HandleError(SymInfoOrErr.takeError()) == ErrorPolicy::Halt) - return; + HandleError(SymInfoOrErr.takeError()); continue; } @@ -1747,10 +1803,8 @@ public: if (!I.second) { RelocAddrEntry &entry = I.first->getSecond(); if (entry.Reloc2) { - ErrorPolicy EP = HandleError(createError( + HandleError(createError( "At most two relocations per offset are supported")); - if (EP == ErrorPolicy::Halt) - return; } entry.Reloc2 = Reloc; entry.SymbolValue2 = SymInfoOrErr->Address; @@ -1758,11 +1812,10 @@ public: } else { SmallString<32> Type; Reloc.getTypeName(Type); - ErrorPolicy EP = HandleError( + // FIXME: Support more relocations & change this to an error + HandleWarning( createError("failed to compute relocation: " + Type + ", ", errorCodeToError(object_error::parse_failed))); - if (EP == ErrorPolicy::Halt) - return; } } } @@ -1847,6 +1900,8 @@ public: const DWARFSection &getRnglistsSection() const override { return RnglistsSection; } + const DWARFSection &getMacroSection() const override { return MacroSection; } + StringRef getMacroDWOSection() const override { return MacroDWOSection; } StringRef getMacinfoSection() const override { return MacinfoSection; } StringRef getMacinfoDWOSection() const override { return MacinfoDWOSection; } const DWARFSection &getPubnamesSection() const override { return PubnamesSection; } @@ -1890,18 +1945,25 @@ public: std::unique_ptr<DWARFContext> DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L, - function_ref<ErrorPolicy(Error)> HandleError, - std::string DWPName) { - auto DObj = std::make_unique<DWARFObjInMemory>(Obj, L, HandleError); - return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName)); + std::string DWPName, + std::function<void(Error)> RecoverableErrorHandler, + std::function<void(Error)> WarningHandler) { + auto DObj = + std::make_unique<DWARFObjInMemory>(Obj, L, RecoverableErrorHandler, WarningHandler); + return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName), + RecoverableErrorHandler, + WarningHandler); } std::unique_ptr<DWARFContext> DWARFContext::create(const StringMap<std::unique_ptr<MemoryBuffer>> &Sections, - uint8_t AddrSize, bool isLittleEndian) { + uint8_t AddrSize, bool isLittleEndian, + std::function<void(Error)> RecoverableErrorHandler, + std::function<void(Error)> WarningHandler) { auto DObj = std::make_unique<DWARFObjInMemory>(Sections, AddrSize, isLittleEndian); - return std::make_unique<DWARFContext>(std::move(DObj), ""); + return std::make_unique<DWARFContext>( + std::move(DObj), "", RecoverableErrorHandler, WarningHandler); } Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) { @@ -1924,19 +1986,9 @@ Error DWARFContext::loadRegisterInfo(const object::ObjectFile &Obj) { uint8_t DWARFContext::getCUAddrSize() { // In theory, different compile units may have different address byte // sizes, but for simplicity we just use the address byte size of the - // last compile unit. In practice the address size field is repeated across + // first compile unit. In practice the address size field is repeated across // various DWARF headers (at least in version 5) to make it easier to dump // them independently, not to enable varying the address size. - uint8_t Addr = 0; - for (const auto &CU : compile_units()) { - Addr = CU->getAddressByteSize(); - break; - } - return Addr; -} - -void DWARFContext::dumpWarning(Error Warning) { - handleAllErrors(std::move(Warning), [](ErrorInfoBase &Info) { - WithColor::warning() << Info.message() << '\n'; - }); + unit_iterator_range CUs = compile_units(); + return CUs.empty() ? 0 : (*CUs.begin())->getAddressByteSize(); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp index 53e676bc7031..886fe1dff976 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp @@ -7,11 +7,42 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" using namespace llvm; +std::pair<uint64_t, dwarf::DwarfFormat> +DWARFDataExtractor::getInitialLength(uint64_t *Off, Error *Err) const { + ErrorAsOutParameter ErrAsOut(Err); + if (Err && *Err) + return {0, dwarf::DWARF32}; + + Cursor C(*Off); + uint64_t Length = getRelocatedValue(C, 4); + dwarf::DwarfFormat Format = dwarf::DWARF32; + if (Length == dwarf::DW_LENGTH_DWARF64) { + Length = getRelocatedValue(C, 8); + Format = dwarf::DWARF64; + } else if (Length >= dwarf::DW_LENGTH_lo_reserved) { + cantFail(C.takeError()); + if (Err) + *Err = createStringError( + errc::invalid_argument, + "unsupported reserved unit length of value 0x%8.8" PRIx64, Length); + return {0, dwarf::DWARF32}; + } + + if (C) { + *Off = C.tell(); + return {Length, Format}; + } + if (Err) + *Err = C.takeError(); + else + consumeError(C.takeError()); + return {0, dwarf::DWARF32}; +} + uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off, uint64_t *SecNdx, Error *Err) const { @@ -19,9 +50,11 @@ uint64_t DWARFDataExtractor::getRelocatedValue(uint32_t Size, uint64_t *Off, *SecNdx = object::SectionedAddress::UndefSection; if (!Section) return getUnsigned(Off, Size, Err); + + ErrorAsOutParameter ErrAsOut(Err); Optional<RelocAddrEntry> E = Obj->find(*Section, *Off); uint64_t A = getUnsigned(Off, Size, Err); - if (!E) + if (!E || (Err && *Err)) return A; if (SecNdx) *SecNdx = E->SectionIndex; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp index f71543799e28..dcf2aefeb39f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp @@ -12,144 +12,144 @@ using namespace llvm; -void DWARFDebugAddrTable::clear() { - HeaderData = {}; +Error DWARFDebugAddrTable::extractAddresses(const DWARFDataExtractor &Data, + uint64_t *OffsetPtr, + uint64_t EndOffset) { + assert(EndOffset >= *OffsetPtr); + uint64_t DataSize = EndOffset - *OffsetPtr; + assert(Data.isValidOffsetForDataOfSize(*OffsetPtr, DataSize)); + if (AddrSize != 4 && AddrSize != 8) + return createStringError(errc::not_supported, + "address table at offset 0x%" PRIx64 + " has unsupported address size %" PRIu8 + " (4 and 8 are supported)", + Offset, AddrSize); + if (DataSize % AddrSize != 0) { + invalidateLength(); + return createStringError(errc::invalid_argument, + "address table at offset 0x%" PRIx64 + " contains data of size 0x%" PRIx64 + " which is not a multiple of addr size %" PRIu8, + Offset, DataSize, AddrSize); + } Addrs.clear(); - invalidateLength(); + size_t Count = DataSize / AddrSize; + Addrs.reserve(Count); + while (Count--) + Addrs.push_back(Data.getRelocatedValue(AddrSize, OffsetPtr)); + return Error::success(); } -Error DWARFDebugAddrTable::extract(DWARFDataExtractor Data, - uint64_t *OffsetPtr, - uint16_t Version, - uint8_t AddrSize, - std::function<void(Error)> WarnCallback) { - clear(); - HeaderOffset = *OffsetPtr; - // Read and verify the length field. - if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t))) +Error DWARFDebugAddrTable::extractV5(const DWARFDataExtractor &Data, + uint64_t *OffsetPtr, uint8_t CUAddrSize, + std::function<void(Error)> WarnCallback) { + Offset = *OffsetPtr; + llvm::Error Err = Error::success(); + std::tie(Length, Format) = Data.getInitialLength(OffsetPtr, &Err); + if (Err) { + invalidateLength(); return createStringError(errc::invalid_argument, - "section is not large enough to contain a " - ".debug_addr table length at offset 0x%" - PRIx64, *OffsetPtr); - uint16_t UnitVersion; - if (Version == 0) { - WarnCallback(createStringError(errc::invalid_argument, - "DWARF version is not defined in CU," - " assuming version 5")); - UnitVersion = 5; - } else { - UnitVersion = Version; + "parsing address table at offset 0x%" PRIx64 + ": %s", + Offset, toString(std::move(Err)).c_str()); + } + + if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, Length)) { + uint64_t DiagnosticLength = Length; + invalidateLength(); + return createStringError( + errc::invalid_argument, + "section is not large enough to contain an address table " + "at offset 0x%" PRIx64 " with a unit_length value of 0x%" PRIx64, + Offset, DiagnosticLength); } - // TODO: Add support for DWARF64. - Format = dwarf::DwarfFormat::DWARF32; - if (UnitVersion >= 5) { - HeaderData.Length = Data.getU32(OffsetPtr); - if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { - invalidateLength(); - return createStringError(errc::not_supported, - "DWARF64 is not supported in .debug_addr at offset 0x%" PRIx64, - HeaderOffset); - } - if (HeaderData.Length + sizeof(uint32_t) < sizeof(Header)) { - uint32_t TmpLength = getLength(); - invalidateLength(); - return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx64 - " has too small length (0x%" PRIx32 - ") to contain a complete header", - HeaderOffset, TmpLength); - } - uint64_t End = HeaderOffset + getLength(); - if (!Data.isValidOffsetForDataOfSize(HeaderOffset, End - HeaderOffset)) { - uint32_t TmpLength = getLength(); - invalidateLength(); - return createStringError(errc::invalid_argument, - "section is not large enough to contain a .debug_addr table " - "of length 0x%" PRIx32 " at offset 0x%" PRIx64, - TmpLength, HeaderOffset); - } - - HeaderData.Version = Data.getU16(OffsetPtr); - HeaderData.AddrSize = Data.getU8(OffsetPtr); - HeaderData.SegSize = Data.getU8(OffsetPtr); - DataSize = getDataSize(); - } else { - HeaderData.Version = UnitVersion; - HeaderData.AddrSize = AddrSize; - // TODO: Support for non-zero SegSize. - HeaderData.SegSize = 0; - DataSize = Data.size(); + uint64_t EndOffset = *OffsetPtr + Length; + // Ensure that we can read the remaining header fields. + if (Length < 4) { + uint64_t DiagnosticLength = Length; + invalidateLength(); + return createStringError( + errc::invalid_argument, + "address table at offset 0x%" PRIx64 + " has a unit_length value of 0x%" PRIx64 + ", which is too small to contain a complete header", + Offset, DiagnosticLength); } - // Perform basic validation of the remaining header fields. + Version = Data.getU16(OffsetPtr); + AddrSize = Data.getU8(OffsetPtr); + SegSize = Data.getU8(OffsetPtr); - // We support DWARF version 5 for now as well as pre-DWARF5 - // implementations of .debug_addr table, which doesn't contain a header - // and consists only of a series of addresses. - if (HeaderData.Version > 5) { - return createStringError(errc::not_supported, "version %" PRIu16 - " of .debug_addr section at offset 0x%" PRIx64 " is not supported", - HeaderData.Version, HeaderOffset); - } - // FIXME: For now we just treat version mismatch as an error, - // however the correct way to associate a .debug_addr table - // with a .debug_info table is to look at the DW_AT_addr_base - // attribute in the info table. - if (HeaderData.Version != UnitVersion) - return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx64 - " has version %" PRIu16 - " which is different from the version suggested" - " by the DWARF unit header: %" PRIu16, - HeaderOffset, HeaderData.Version, UnitVersion); - if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) + // Perform a basic validation of the header fields. + if (Version != 5) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx64 - " has unsupported address size %" PRIu8, - HeaderOffset, HeaderData.AddrSize); - if (HeaderData.AddrSize != AddrSize && AddrSize != 0) - return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx64 - " has address size %" PRIu8 - " which is different from CU address size %" PRIu8, - HeaderOffset, HeaderData.AddrSize, AddrSize); - + "address table at offset 0x%" PRIx64 + " has unsupported version %" PRIu16, + Offset, Version); // TODO: add support for non-zero segment selector size. - if (HeaderData.SegSize != 0) + if (SegSize != 0) return createStringError(errc::not_supported, - ".debug_addr table at offset 0x%" PRIx64 - " has unsupported segment selector size %" PRIu8, - HeaderOffset, HeaderData.SegSize); - if (DataSize % HeaderData.AddrSize != 0) { - invalidateLength(); - return createStringError(errc::invalid_argument, - ".debug_addr table at offset 0x%" PRIx64 - " contains data of size %" PRIu32 - " which is not a multiple of addr size %" PRIu8, - HeaderOffset, DataSize, HeaderData.AddrSize); + "address table at offset 0x%" PRIx64 + " has unsupported segment selector size %" PRIu8, + Offset, SegSize); + + if (Error Err = extractAddresses(Data, OffsetPtr, EndOffset)) + return Err; + if (CUAddrSize && AddrSize != CUAddrSize) { + WarnCallback(createStringError( + errc::invalid_argument, + "address table at offset 0x%" PRIx64 " has address size %" PRIu8 + " which is different from CU address size %" PRIu8, + Offset, AddrSize, CUAddrSize)); } - Data.setAddressSize(HeaderData.AddrSize); - uint32_t AddrCount = DataSize / HeaderData.AddrSize; - for (uint32_t I = 0; I < AddrCount; ++I) - if (HeaderData.AddrSize == 4) - Addrs.push_back(Data.getU32(OffsetPtr)); - else - Addrs.push_back(Data.getU64(OffsetPtr)); return Error::success(); } +Error DWARFDebugAddrTable::extractPreStandard(const DWARFDataExtractor &Data, + uint64_t *OffsetPtr, + uint16_t CUVersion, + uint8_t CUAddrSize) { + assert(CUVersion > 0 && CUVersion < 5); + + Offset = *OffsetPtr; + Length = 0; + Version = CUVersion; + AddrSize = CUAddrSize; + SegSize = 0; + + return extractAddresses(Data, OffsetPtr, Data.size()); +} + +Error DWARFDebugAddrTable::extract(const DWARFDataExtractor &Data, + uint64_t *OffsetPtr, + uint16_t CUVersion, + uint8_t CUAddrSize, + std::function<void(Error)> WarnCallback) { + if (CUVersion > 0 && CUVersion < 5) + return extractPreStandard(Data, OffsetPtr, CUVersion, CUAddrSize); + if (CUVersion == 0) + WarnCallback(createStringError(errc::invalid_argument, + "DWARF version is not defined in CU," + " assuming version 5")); + return extractV5(Data, OffsetPtr, CUAddrSize, WarnCallback); +} + void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (DumpOpts.Verbose) - OS << format("0x%8.8" PRIx32 ": ", HeaderOffset); - OS << format("Addr Section: length = 0x%8.8" PRIx32 - ", version = 0x%4.4" PRIx16 ", " - "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 "\n", - HeaderData.Length, HeaderData.Version, HeaderData.AddrSize, - HeaderData.SegSize); + OS << format("0x%8.8" PRIx64 ": ", Offset); + if (Length) { + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); + OS << "Address table header: " + << format("length = 0x%0*" PRIx64, OffsetDumpWidth, Length) + << ", format = " << dwarf::FormatString(Format) + << format(", version = 0x%4.4" PRIx16, Version) + << format(", addr_size = 0x%2.2" PRIx8, AddrSize) + << format(", seg_size = 0x%2.2" PRIx8, SegSize) << "\n"; + } if (Addrs.size() > 0) { - const char *AddrFmt = (HeaderData.AddrSize == 4) ? "0x%8.8" PRIx64 "\n" - : "0x%16.16" PRIx64 "\n"; + const char *AddrFmt = + (AddrSize == 4) ? "0x%8.8" PRIx64 "\n" : "0x%16.16" PRIx64 "\n"; OS << "Addrs: [\n"; for (uint64_t Addr : Addrs) OS << format(AddrFmt, Addr); @@ -162,21 +162,13 @@ Expected<uint64_t> DWARFDebugAddrTable::getAddrEntry(uint32_t Index) const { return Addrs[Index]; return createStringError(errc::invalid_argument, "Index %" PRIu32 " is out of range of the " - ".debug_addr table at offset 0x%" PRIx64, - Index, HeaderOffset); + "address table at offset 0x%" PRIx64, + Index, Offset); } -uint32_t DWARFDebugAddrTable::getLength() const { - if (HeaderData.Length == 0) - return 0; - // TODO: DWARF64 support. - return HeaderData.Length + sizeof(uint32_t); +Optional<uint64_t> DWARFDebugAddrTable::getFullLength() const { + if (Length == 0) + return None; + return Length + dwarf::getUnitLengthFieldByteSize(Format); } -uint32_t DWARFDebugAddrTable::getDataSize() const { - if (DataSize != 0) - return DataSize; - if (getLength() == 0) - return 0; - return getLength() - getHeaderSize(); -} diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp index 200b2d52a02b..608fc0388af0 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -29,80 +31,141 @@ void DWARFDebugArangeSet::clear() { ArangeDescriptors.clear(); } -bool -DWARFDebugArangeSet::extract(DataExtractor data, uint64_t *offset_ptr) { - if (data.isValidOffset(*offset_ptr)) { - ArangeDescriptors.clear(); - Offset = *offset_ptr; - - // 7.20 Address Range Table - // - // Each set of entries in the table of address ranges contained in - // the .debug_aranges section begins with a header consisting of: a - // 4-byte length containing the length of the set of entries for this - // compilation unit, not including the length field itself; a 2-byte - // version identifier containing the value 2 for DWARF Version 2; a - // 4-byte offset into the.debug_infosection; a 1-byte unsigned integer - // containing the size in bytes of an address (or the offset portion of - // an address for segmented addressing) on the target system; and a - // 1-byte unsigned integer containing the size in bytes of a segment - // descriptor on the target system. This header is followed by a series - // of tuples. Each tuple consists of an address and a length, each in - // the size appropriate for an address on the target architecture. - HeaderData.Length = data.getU32(offset_ptr); - HeaderData.Version = data.getU16(offset_ptr); - HeaderData.CuOffset = data.getU32(offset_ptr); - HeaderData.AddrSize = data.getU8(offset_ptr); - HeaderData.SegSize = data.getU8(offset_ptr); - - // Perform basic validation of the header fields. - if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) || - (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) { - clear(); - return false; - } - - // The first tuple following the header in each set begins at an offset - // that is a multiple of the size of a single tuple (that is, twice the - // size of an address). The header is padded, if necessary, to the - // appropriate boundary. - const uint32_t header_size = *offset_ptr - Offset; - const uint32_t tuple_size = HeaderData.AddrSize * 2; - uint32_t first_tuple_offset = 0; - while (first_tuple_offset < header_size) - first_tuple_offset += tuple_size; - - *offset_ptr = Offset + first_tuple_offset; - - Descriptor arangeDescriptor; - - static_assert(sizeof(arangeDescriptor.Address) == - sizeof(arangeDescriptor.Length), - "Different datatypes for addresses and sizes!"); - assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize); - - while (data.isValidOffset(*offset_ptr)) { - arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); - arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); +Error DWARFDebugArangeSet::extract(DWARFDataExtractor data, + uint64_t *offset_ptr) { + assert(data.isValidOffset(*offset_ptr)); + ArangeDescriptors.clear(); + Offset = *offset_ptr; + + // 7.21 Address Range Table (extract) + // Each set of entries in the table of address ranges contained in + // the .debug_aranges section begins with a header containing: + // 1. unit_length (initial length) + // A 4-byte (32-bit DWARF) or 12-byte (64-bit DWARF) length containing + // the length of the set of entries for this compilation unit, + // not including the length field itself. + // 2. version (uhalf) + // The value in this field is 2. + // 3. debug_info_offset (section offset) + // A 4-byte (32-bit DWARF) or 8-byte (64-bit DWARF) offset into the + // .debug_info section of the compilation unit header. + // 4. address_size (ubyte) + // 5. segment_selector_size (ubyte) + // This header is followed by a series of tuples. Each tuple consists of + // a segment, an address and a length. The segment selector size is given by + // the segment_selector_size field of the header; the address and length + // size are each given by the address_size field of the header. Each set of + // tuples is terminated by a 0 for the segment, a 0 for the address and 0 + // for the length. If the segment_selector_size field in the header is zero, + // the segment selectors are omitted from all tuples, including + // the terminating tuple. + + Error Err = Error::success(); + std::tie(HeaderData.Length, HeaderData.Format) = + data.getInitialLength(offset_ptr, &Err); + HeaderData.Version = data.getU16(offset_ptr, &Err); + HeaderData.CuOffset = data.getUnsigned( + offset_ptr, dwarf::getDwarfOffsetByteSize(HeaderData.Format), &Err); + HeaderData.AddrSize = data.getU8(offset_ptr, &Err); + HeaderData.SegSize = data.getU8(offset_ptr, &Err); + if (Err) { + return createStringError(errc::invalid_argument, + "parsing address ranges table at offset 0x%" PRIx64 + ": %s", + Offset, toString(std::move(Err)).c_str()); + } + // Perform basic validation of the header fields. + uint64_t full_length = + dwarf::getUnitLengthFieldByteSize(HeaderData.Format) + HeaderData.Length; + if (!data.isValidOffsetForDataOfSize(Offset, full_length)) + return createStringError(errc::invalid_argument, + "the length of address range table at offset " + "0x%" PRIx64 " exceeds section size", + Offset); + if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8) + return createStringError(errc::invalid_argument, + "address range table at offset 0x%" PRIx64 + " has unsupported address size: %d " + "(4 and 8 supported)", + Offset, HeaderData.AddrSize); + if (HeaderData.SegSize != 0) + return createStringError(errc::not_supported, + "non-zero segment selector size in address range " + "table at offset 0x%" PRIx64 " is not supported", + Offset); + + // The first tuple following the header in each set begins at an offset that + // is a multiple of the size of a single tuple (that is, twice the size of + // an address because we do not support non-zero segment selector sizes). + // Therefore, the full length should also be a multiple of the tuple size. + const uint32_t tuple_size = HeaderData.AddrSize * 2; + if (full_length % tuple_size != 0) + return createStringError( + errc::invalid_argument, + "address range table at offset 0x%" PRIx64 + " has length that is not a multiple of the tuple size", + Offset); + + // The header is padded, if necessary, to the appropriate boundary. + const uint32_t header_size = *offset_ptr - Offset; + uint32_t first_tuple_offset = 0; + while (first_tuple_offset < header_size) + first_tuple_offset += tuple_size; + + // There should be space for at least one tuple. + if (full_length <= first_tuple_offset) + return createStringError( + errc::invalid_argument, + "address range table at offset 0x%" PRIx64 + " has an insufficient length to contain any entries", + Offset); + + *offset_ptr = Offset + first_tuple_offset; + + Descriptor arangeDescriptor; + + static_assert(sizeof(arangeDescriptor.Address) == + sizeof(arangeDescriptor.Length), + "Different datatypes for addresses and sizes!"); + assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize); + + uint64_t end_offset = Offset + full_length; + while (*offset_ptr < end_offset) { + arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); + arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); + + if (arangeDescriptor.Length == 0) { // Each set of tuples is terminated by a 0 for the address and 0 // for the length. - if (arangeDescriptor.Address || arangeDescriptor.Length) - ArangeDescriptors.push_back(arangeDescriptor); - else - break; // We are done if we get a zero address and length + if (arangeDescriptor.Address == 0 && *offset_ptr == end_offset) + return ErrorSuccess(); + return createStringError( + errc::invalid_argument, + "address range table at offset 0x%" PRIx64 + " has an invalid tuple (length = 0) at offset 0x%" PRIx64, + Offset, *offset_ptr - tuple_size); } - return !ArangeDescriptors.empty(); + ArangeDescriptors.push_back(arangeDescriptor); } - return false; + + return createStringError(errc::invalid_argument, + "address range table at offset 0x%" PRIx64 + " is not terminated by null entry", + Offset); } void DWARFDebugArangeSet::dump(raw_ostream &OS) const { - OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ", - HeaderData.Length, HeaderData.Version) - << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", - HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize); + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(HeaderData.Format); + OS << "Address Range Header: " + << format("length = 0x%0*" PRIx64 ", ", OffsetDumpWidth, HeaderData.Length) + << "format = " << dwarf::FormatString(HeaderData.Format) << ", " + << format("version = 0x%4.4x, ", HeaderData.Version) + << format("cu_offset = 0x%0*" PRIx64 ", ", OffsetDumpWidth, + HeaderData.CuOffset) + << format("addr_size = 0x%2.2x, ", HeaderData.AddrSize) + << format("seg_size = 0x%2.2x\n", HeaderData.SegSize); for (const auto &Desc : ArangeDescriptors) { Desc.dump(OS, HeaderData.AddrSize); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp index fa157e868851..e8ed63075055 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAranges.cpp @@ -11,7 +11,6 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h" #include "llvm/Support/DataExtractor.h" -#include "llvm/Support/WithColor.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -20,13 +19,19 @@ using namespace llvm; -void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { +void DWARFDebugAranges::extract( + DWARFDataExtractor DebugArangesData, + function_ref<void(Error)> RecoverableErrorHandler) { if (!DebugArangesData.isValidOffset(0)) return; uint64_t Offset = 0; DWARFDebugArangeSet Set; - while (Set.extract(DebugArangesData, &Offset)) { + while (DebugArangesData.isValidOffset(Offset)) { + if (Error E = Set.extract(DebugArangesData, &Offset)) { + RecoverableErrorHandler(std::move(E)); + return; + } uint64_t CUOffset = Set.getCompileUnitDIEOffset(); for (const auto &Desc : Set.descriptors()) { uint64_t LowPC = Desc.Address; @@ -43,9 +48,9 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { return; // Extract aranges from .debug_aranges section. - DataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(), - CTX->isLittleEndian(), 0); - extract(ArangesData); + DWARFDataExtractor ArangesData(CTX->getDWARFObj().getArangesSection(), + CTX->isLittleEndian(), 0); + extract(ArangesData, CTX->getRecoverableErrorHandler()); // Generate aranges from DIEs: even if .debug_aranges section is present, // it may describe only a small subset of compilation units, so we need to @@ -55,7 +60,7 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { if (ParsedCUOffsets.insert(CUOffset).second) { Expected<DWARFAddressRangesVector> CURanges = CU->collectAddressRanges(); if (!CURanges) - WithColor::error() << toString(CURanges.takeError()) << '\n'; + CTX->getRecoverableErrorHandler()(CURanges.takeError()); else for (const auto &R : *CURanges) appendRange(CUOffset, R.LowPC, R.HighPC); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index 81b00f65741b..0a1b75592290 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -36,123 +36,130 @@ const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset, uint64_t EndOffset) { - while (*Offset < EndOffset) { - uint8_t Opcode = Data.getRelocatedValue(1, Offset); - // Some instructions have a primary opcode encoded in the top bits. - uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; + DataExtractor::Cursor C(*Offset); + while (C && C.tell() < EndOffset) { + uint8_t Opcode = Data.getRelocatedValue(C, 1); + if (!C) + break; - if (Primary) { + // Some instructions have a primary opcode encoded in the top bits. + if (uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) { // If it's a primary opcode, the first operand is encoded in the bottom // bits of the opcode itself. uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; switch (Primary) { - default: - return createStringError(errc::illegal_byte_sequence, - "Invalid primary CFI opcode 0x%" PRIx8, - Primary); case DW_CFA_advance_loc: case DW_CFA_restore: addInstruction(Primary, Op1); break; case DW_CFA_offset: - addInstruction(Primary, Op1, Data.getULEB128(Offset)); + addInstruction(Primary, Op1, Data.getULEB128(C)); break; - } - } else { - // Extended opcode - its value is Opcode itself. - switch (Opcode) { default: - return createStringError(errc::illegal_byte_sequence, - "Invalid extended CFI opcode 0x%" PRIx8, - Opcode); - case DW_CFA_nop: - case DW_CFA_remember_state: - case DW_CFA_restore_state: - case DW_CFA_GNU_window_save: - // No operands - addInstruction(Opcode); - break; - case DW_CFA_set_loc: - // Operands: Address - addInstruction(Opcode, Data.getRelocatedAddress(Offset)); - break; - case DW_CFA_advance_loc1: - // Operands: 1-byte delta - addInstruction(Opcode, Data.getRelocatedValue(1, Offset)); - break; - case DW_CFA_advance_loc2: - // Operands: 2-byte delta - addInstruction(Opcode, Data.getRelocatedValue(2, Offset)); - break; - case DW_CFA_advance_loc4: - // Operands: 4-byte delta - addInstruction(Opcode, Data.getRelocatedValue(4, Offset)); - break; - case DW_CFA_restore_extended: - case DW_CFA_undefined: - case DW_CFA_same_value: - case DW_CFA_def_cfa_register: - case DW_CFA_def_cfa_offset: - case DW_CFA_GNU_args_size: - // Operands: ULEB128 - addInstruction(Opcode, Data.getULEB128(Offset)); - break; - case DW_CFA_def_cfa_offset_sf: - // Operands: SLEB128 - addInstruction(Opcode, Data.getSLEB128(Offset)); - break; - case DW_CFA_offset_extended: - case DW_CFA_register: - case DW_CFA_def_cfa: - case DW_CFA_val_offset: { - // Operands: ULEB128, ULEB128 - // Note: We can not embed getULEB128 directly into function - // argument list. getULEB128 changes Offset and order of evaluation - // for arguments is unspecified. - auto op1 = Data.getULEB128(Offset); - auto op2 = Data.getULEB128(Offset); - addInstruction(Opcode, op1, op2); - break; - } - case DW_CFA_offset_extended_sf: - case DW_CFA_def_cfa_sf: - case DW_CFA_val_offset_sf: { - // Operands: ULEB128, SLEB128 - // Note: see comment for the previous case - auto op1 = Data.getULEB128(Offset); - auto op2 = (uint64_t)Data.getSLEB128(Offset); - addInstruction(Opcode, op1, op2); - break; - } - case DW_CFA_def_cfa_expression: { - uint32_t ExprLength = Data.getULEB128(Offset); - addInstruction(Opcode, 0); - DataExtractor Extractor( - Data.getData().slice(*Offset, *Offset + ExprLength), - Data.isLittleEndian(), Data.getAddressSize()); - Instructions.back().Expression = DWARFExpression( - Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION); - *Offset += ExprLength; - break; - } - case DW_CFA_expression: - case DW_CFA_val_expression: { - auto RegNum = Data.getULEB128(Offset); - auto BlockLength = Data.getULEB128(Offset); - addInstruction(Opcode, RegNum, 0); - DataExtractor Extractor( - Data.getData().slice(*Offset, *Offset + BlockLength), - Data.isLittleEndian(), Data.getAddressSize()); - Instructions.back().Expression = DWARFExpression( - Extractor, Data.getAddressSize(), dwarf::DWARF_VERSION); - *Offset += BlockLength; - break; - } + llvm_unreachable("invalid primary CFI opcode"); } + continue; + } + + // Extended opcode - its value is Opcode itself. + switch (Opcode) { + default: + return createStringError(errc::illegal_byte_sequence, + "invalid extended CFI opcode 0x%" PRIx8, Opcode); + case DW_CFA_nop: + case DW_CFA_remember_state: + case DW_CFA_restore_state: + case DW_CFA_GNU_window_save: + // No operands + addInstruction(Opcode); + break; + case DW_CFA_set_loc: + // Operands: Address + addInstruction(Opcode, Data.getRelocatedAddress(C)); + break; + case DW_CFA_advance_loc1: + // Operands: 1-byte delta + addInstruction(Opcode, Data.getRelocatedValue(C, 1)); + break; + case DW_CFA_advance_loc2: + // Operands: 2-byte delta + addInstruction(Opcode, Data.getRelocatedValue(C, 2)); + break; + case DW_CFA_advance_loc4: + // Operands: 4-byte delta + addInstruction(Opcode, Data.getRelocatedValue(C, 4)); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + case DW_CFA_def_cfa_register: + case DW_CFA_def_cfa_offset: + case DW_CFA_GNU_args_size: + // Operands: ULEB128 + addInstruction(Opcode, Data.getULEB128(C)); + break; + case DW_CFA_def_cfa_offset_sf: + // Operands: SLEB128 + addInstruction(Opcode, Data.getSLEB128(C)); + break; + case DW_CFA_offset_extended: + case DW_CFA_register: + case DW_CFA_def_cfa: + case DW_CFA_val_offset: { + // Operands: ULEB128, ULEB128 + // Note: We can not embed getULEB128 directly into function + // argument list. getULEB128 changes Offset and order of evaluation + // for arguments is unspecified. + uint64_t op1 = Data.getULEB128(C); + uint64_t op2 = Data.getULEB128(C); + addInstruction(Opcode, op1, op2); + break; + } + case DW_CFA_offset_extended_sf: + case DW_CFA_def_cfa_sf: + case DW_CFA_val_offset_sf: { + // Operands: ULEB128, SLEB128 + // Note: see comment for the previous case + uint64_t op1 = Data.getULEB128(C); + uint64_t op2 = (uint64_t)Data.getSLEB128(C); + addInstruction(Opcode, op1, op2); + break; + } + case DW_CFA_def_cfa_expression: { + uint64_t ExprLength = Data.getULEB128(C); + addInstruction(Opcode, 0); + StringRef Expression = Data.getBytes(C, ExprLength); + + DataExtractor Extractor(Expression, Data.isLittleEndian(), + Data.getAddressSize()); + // Note. We do not pass the DWARF format to DWARFExpression, because + // DW_OP_call_ref, the only operation which depends on the format, is + // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. + Instructions.back().Expression = + DWARFExpression(Extractor, Data.getAddressSize()); + break; + } + case DW_CFA_expression: + case DW_CFA_val_expression: { + uint64_t RegNum = Data.getULEB128(C); + addInstruction(Opcode, RegNum, 0); + + uint64_t BlockLength = Data.getULEB128(C); + StringRef Expression = Data.getBytes(C, BlockLength); + DataExtractor Extractor(Expression, Data.isLittleEndian(), + Data.getAddressSize()); + // Note. We do not pass the DWARF format to DWARFExpression, because + // DW_OP_call_ref, the only operation which depends on the format, is + // prohibited in call frame instructions, see sec. 6.4.2 in DWARFv5. + Instructions.back().Expression = + DWARFExpression(Extractor, Data.getAddressSize()); + break; + } } } - return Error::success(); + *Offset = C.tell(); + return C.takeError(); } namespace { @@ -285,12 +292,33 @@ void CFIProgram::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH, } } +// Returns the CIE identifier to be used by the requested format. +// CIE ids for .debug_frame sections are defined in Section 7.24 of DWARFv5. +// For CIE ID in .eh_frame sections see +// https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html +constexpr uint64_t getCIEId(bool IsDWARF64, bool IsEH) { + if (IsEH) + return 0; + if (IsDWARF64) + return DW64_CIE_ID; + return DW_CIE_ID; +} + void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { - OS << format("%08x %08x %08x CIE", (uint32_t)Offset, (uint32_t)Length, - DW_CIE_ID) - << "\n"; - OS << format(" Version: %d\n", Version); - OS << " Augmentation: \"" << Augmentation << "\"\n"; + // A CIE with a zero length is a terminator entry in the .eh_frame section. + if (IsEH && Length == 0) { + OS << format("%08" PRIx64, Offset) << " ZERO terminator\n"; + return; + } + + OS << format("%08" PRIx64, Offset) + << format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length) + << format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, + getCIEId(IsDWARF64, IsEH)) + << " CIE\n" + << " Format: " << FormatString(IsDWARF64) << "\n" + << format(" Version: %d\n", Version) + << " Augmentation: \"" << Augmentation << "\"\n"; if (Version >= 4) { OS << format(" Address size: %u\n", (uint32_t)AddressSize); OS << format(" Segment desc size: %u\n", @@ -313,11 +341,17 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { } void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { - OS << format("%08x %08x %08x FDE ", (uint32_t)Offset, (uint32_t)Length, - (int32_t)LinkedCIEOffset); - OS << format("cie=%08x pc=%08x...%08x\n", (int32_t)LinkedCIEOffset, - (uint32_t)InitialLocation, - (uint32_t)InitialLocation + (uint32_t)AddressRange); + OS << format("%08" PRIx64, Offset) + << format(" %0*" PRIx64, IsDWARF64 ? 16 : 8, Length) + << format(" %0*" PRIx64, IsDWARF64 && !IsEH ? 16 : 8, CIEPointer) + << " FDE cie="; + if (LinkedCIE) + OS << format("%08" PRIx64, LinkedCIE->getOffset()); + else + OS << "<invalid offset>"; + OS << format(" pc=%08" PRIx64 "...%08" PRIx64 "\n", InitialLocation, + InitialLocation + AddressRange); + OS << " Format: " << FormatString(IsDWARF64) << "\n"; if (LSDAAddress) OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); CFIs.dump(OS, MRI, IsEH); @@ -340,36 +374,28 @@ static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, errs() << "\n"; } -// This is a workaround for old compilers which do not allow -// noreturn attribute usage in lambdas. Once the support for those -// compilers are phased out, we can remove this and return back to -// a ReportError lambda: [StartOffset](const char *ErrorMsg). -static void LLVM_ATTRIBUTE_NORETURN ReportError(uint64_t StartOffset, - const char *ErrorMsg) { - std::string Str; - raw_string_ostream OS(Str); - OS << format(ErrorMsg, StartOffset); - OS.flush(); - report_fatal_error(Str); -} - -void DWARFDebugFrame::parse(DWARFDataExtractor Data) { +Error DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint64_t Offset = 0; DenseMap<uint64_t, CIE *> CIEs; while (Data.isValidOffset(Offset)) { uint64_t StartOffset = Offset; - bool IsDWARF64 = false; - uint64_t Length = Data.getRelocatedValue(4, &Offset); - uint64_t Id; + uint64_t Length; + DwarfFormat Format; + std::tie(Length, Format) = Data.getInitialLength(&Offset); + bool IsDWARF64 = Format == DWARF64; - if (Length == dwarf::DW_LENGTH_DWARF64) { - // DWARF-64 is distinguished by the first 32 bits of the initial length - // field being 0xffffffff. Then, the next 64 bits are the actual entry - // length. - IsDWARF64 = true; - Length = Data.getRelocatedValue(8, &Offset); + // If the Length is 0, then this CIE is a terminator. We add it because some + // dumper tools might need it to print something special for such entries + // (e.g. llvm-objdump --dwarf=frames prints "ZERO terminator"). + if (Length == 0) { + auto Cie = std::make_unique<CIE>( + IsDWARF64, StartOffset, 0, 0, SmallString<8>(), 0, 0, 0, 0, 0, + SmallString<8>(), 0, 0, None, None, Arch); + CIEs[StartOffset] = Cie.get(); + Entries.push_back(std::move(Cie)); + break; } // At this point, Offset points to the next field after Length. @@ -380,14 +406,21 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { uint64_t EndStructureOffset = Offset + Length; // The Id field's size depends on the DWARF format - Id = Data.getUnsigned(&Offset, (IsDWARF64 && !IsEH) ? 8 : 4); - bool IsCIE = - ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID || (IsEH && !Id)); + Error Err = Error::success(); + uint64_t Id = Data.getRelocatedValue((IsDWARF64 && !IsEH) ? 8 : 4, &Offset, + /*SectionIndex=*/nullptr, &Err); + if (Err) + return Err; - if (IsCIE) { + if (Id == getCIEId(IsDWARF64, IsEH)) { uint8_t Version = Data.getU8(&Offset); const char *Augmentation = Data.getCStr(&Offset); StringRef AugmentationString(Augmentation ? Augmentation : ""); + // TODO: we should provide a way to report a warning and continue dumping. + if (IsEH && Version != 1) + return createStringError(errc::not_supported, + "unsupported CIE version: %" PRIu8, Version); + uint8_t AddressSize = Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset); Data.setAddressSize(AddressSize); @@ -411,61 +444,66 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { // Walk the augmentation string to get all the augmentation data. for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) { switch (AugmentationString[i]) { - default: - ReportError( - StartOffset, - "Unknown augmentation character in entry at %" PRIx64); - case 'L': - LSDAPointerEncoding = Data.getU8(&Offset); - break; - case 'P': { - if (Personality) - ReportError(StartOffset, - "Duplicate personality in entry at %" PRIx64); - PersonalityEncoding = Data.getU8(&Offset); - Personality = Data.getEncodedPointer( - &Offset, *PersonalityEncoding, - EHFrameAddress ? EHFrameAddress + Offset : 0); - break; - } - case 'R': - FDEPointerEncoding = Data.getU8(&Offset); - break; - case 'S': - // Current frame is a signal trampoline. - break; - case 'z': - if (i) - ReportError(StartOffset, - "'z' must be the first character at %" PRIx64); - // Parse the augmentation length first. We only parse it if - // the string contains a 'z'. - AugmentationLength = Data.getULEB128(&Offset); - StartAugmentationOffset = Offset; - EndAugmentationOffset = Offset + *AugmentationLength; - break; - case 'B': - // B-Key is used for signing functions associated with this - // augmentation string - break; + default: + return createStringError( + errc::invalid_argument, + "unknown augmentation character in entry at 0x%" PRIx64, + StartOffset); + case 'L': + LSDAPointerEncoding = Data.getU8(&Offset); + break; + case 'P': { + if (Personality) + return createStringError( + errc::invalid_argument, + "duplicate personality in entry at 0x%" PRIx64, StartOffset); + PersonalityEncoding = Data.getU8(&Offset); + Personality = Data.getEncodedPointer( + &Offset, *PersonalityEncoding, + EHFrameAddress ? EHFrameAddress + Offset : 0); + break; + } + case 'R': + FDEPointerEncoding = Data.getU8(&Offset); + break; + case 'S': + // Current frame is a signal trampoline. + break; + case 'z': + if (i) + return createStringError( + errc::invalid_argument, + "'z' must be the first character at 0x%" PRIx64, StartOffset); + // Parse the augmentation length first. We only parse it if + // the string contains a 'z'. + AugmentationLength = Data.getULEB128(&Offset); + StartAugmentationOffset = Offset; + EndAugmentationOffset = Offset + *AugmentationLength; + break; + case 'B': + // B-Key is used for signing functions associated with this + // augmentation string + break; } } if (AugmentationLength.hasValue()) { if (Offset != EndAugmentationOffset) - ReportError(StartOffset, - "Parsing augmentation data at %" PRIx64 " failed"); - + return createStringError(errc::invalid_argument, + "parsing augmentation data at 0x%" PRIx64 + " failed", + StartOffset); AugmentationData = Data.getData().slice(StartAugmentationOffset, EndAugmentationOffset); } } auto Cie = std::make_unique<CIE>( - StartOffset, Length, Version, AugmentationString, AddressSize, - SegmentDescriptorSize, CodeAlignmentFactor, DataAlignmentFactor, - ReturnAddressRegister, AugmentationData, FDEPointerEncoding, - LSDAPointerEncoding, Personality, PersonalityEncoding, Arch); + IsDWARF64, StartOffset, Length, Version, AugmentationString, + AddressSize, SegmentDescriptorSize, CodeAlignmentFactor, + DataAlignmentFactor, ReturnAddressRegister, AugmentationData, + FDEPointerEncoding, LSDAPointerEncoding, Personality, + PersonalityEncoding, Arch); CIEs[StartOffset] = Cie.get(); Entries.emplace_back(std::move(Cie)); } else { @@ -479,9 +517,10 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { if (IsEH) { // The address size is encoded in the CIE we reference. if (!Cie) - ReportError(StartOffset, "Parsing FDE data at %" PRIx64 - " failed due to missing CIE"); - + return createStringError(errc::invalid_argument, + "parsing FDE data at 0x%" PRIx64 + " failed due to missing CIE", + StartOffset); if (auto Val = Data.getEncodedPointer( &Offset, Cie->getFDEPointerEncoding(), EHFrameAddress ? EHFrameAddress + Offset : 0)) { @@ -507,28 +546,32 @@ void DWARFDebugFrame::parse(DWARFDataExtractor Data) { } if (Offset != EndAugmentationOffset) - ReportError(StartOffset, - "Parsing augmentation data at %" PRIx64 " failed"); + return createStringError(errc::invalid_argument, + "parsing augmentation data at 0x%" PRIx64 + " failed", + StartOffset); } } else { InitialLocation = Data.getRelocatedAddress(&Offset); AddressRange = Data.getRelocatedAddress(&Offset); } - Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer, - InitialLocation, AddressRange, - Cie, LSDAAddress, Arch)); + Entries.emplace_back(new FDE(IsDWARF64, StartOffset, Length, CIEPointer, + InitialLocation, AddressRange, Cie, + LSDAAddress, Arch)); } if (Error E = - Entries.back()->cfis().parse(Data, &Offset, EndStructureOffset)) { - report_fatal_error(toString(std::move(E))); - } + Entries.back()->cfis().parse(Data, &Offset, EndStructureOffset)) + return E; if (Offset != EndStructureOffset) - ReportError(StartOffset, - "Parsing entry instructions at %" PRIx64 " failed"); + return createStringError( + errc::invalid_argument, + "parsing entry instructions at 0x%" PRIx64 " failed", StartOffset); } + + return Error::success(); } FrameEntry *DWARFDebugFrame::getEntryAtOffset(uint64_t Offset) const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 11adb1e47640..3ca21e97888c 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -41,6 +42,10 @@ using ContentDescriptors = SmallVector<ContentDescriptor, 4>; } // end anonymous namespace +static bool versionIsSupported(uint16_t Version) { + return Version >= 2 && Version <= 5; +} + void DWARFDebugLine::ContentTypeTracker::trackContentType( dwarf::LineNumberEntryFormat ContentType) { switch (ContentType) { @@ -99,13 +104,21 @@ void DWARFDebugLine::Prologue::clear() { void DWARFDebugLine::Prologue::dump(raw_ostream &OS, DIDumpOptions DumpOptions) const { + if (!totalLengthIsValid()) + return; + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(FormParams.Format); OS << "Line table prologue:\n" - << format(" total_length: 0x%8.8" PRIx64 "\n", TotalLength) + << format(" total_length: 0x%0*" PRIx64 "\n", OffsetDumpWidth, + TotalLength) + << " format: " << dwarf::FormatString(FormParams.Format) << "\n" << format(" version: %u\n", getVersion()); + if (!versionIsSupported(getVersion())) + return; if (getVersion() >= 5) OS << format(" address_size: %u\n", getAddressSize()) << format(" seg_select_size: %u\n", SegSelectorSize); - OS << format(" prologue_length: 0x%8.8" PRIx64 "\n", PrologueLength) + OS << format(" prologue_length: 0x%0*" PRIx64 "\n", OffsetDumpWidth, + PrologueLength) << format(" min_inst_length: %u\n", MinInstLength) << format(getVersion() >= 4 ? "max_ops_per_inst: %u\n" : "", MaxOpsPerInst) << format(" default_is_stmt: %u\n", DefaultIsStmt) @@ -114,8 +127,9 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS, << format(" opcode_base: %u\n", OpcodeBase); for (uint32_t I = 0; I != StandardOpcodeLengths.size(); ++I) - OS << format("standard_opcode_lengths[%s] = %u\n", - LNStandardString(I + 1).data(), StandardOpcodeLengths[I]); + OS << formatv("standard_opcode_lengths[{0}] = {1}\n", + static_cast<dwarf::LineNumberOps>(I + 1), + StandardOpcodeLengths[I]); if (!IncludeDirectories.empty()) { // DWARF v5 starts directory indexes at 0. @@ -153,14 +167,21 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS, } // Parse v2-v4 directory and file tables. -static void +static Error parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, - uint64_t *OffsetPtr, uint64_t EndPrologueOffset, + uint64_t *OffsetPtr, DWARFDebugLine::ContentTypeTracker &ContentTypes, std::vector<DWARFFormValue> &IncludeDirectories, std::vector<DWARFDebugLine::FileNameEntry> &FileNames) { - while (*OffsetPtr < EndPrologueOffset) { - StringRef S = DebugLineData.getCStrRef(OffsetPtr); + while (true) { + Error Err = Error::success(); + StringRef S = DebugLineData.getCStrRef(OffsetPtr, &Err); + if (Err) { + consumeError(std::move(Err)); + return createStringError(errc::invalid_argument, + "include directories table was not null " + "terminated before the end of the prologue"); + } if (S.empty()) break; DWARFFormValue Dir = @@ -168,21 +189,33 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, IncludeDirectories.push_back(Dir); } - while (*OffsetPtr < EndPrologueOffset) { - StringRef Name = DebugLineData.getCStrRef(OffsetPtr); - if (Name.empty()) + ContentTypes.HasModTime = true; + ContentTypes.HasLength = true; + + while (true) { + Error Err = Error::success(); + StringRef Name = DebugLineData.getCStrRef(OffsetPtr, &Err); + if (!Err && Name.empty()) break; + DWARFDebugLine::FileNameEntry FileEntry; FileEntry.Name = DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name.data()); - FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); - FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); - FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr, &Err); + FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr, &Err); + FileEntry.Length = DebugLineData.getULEB128(OffsetPtr, &Err); + + if (Err) { + consumeError(std::move(Err)); + return createStringError( + errc::invalid_argument, + "file names table was not null terminated before " + "the end of the prologue"); + } FileNames.push_back(FileEntry); } - ContentTypes.HasModTime = true; - ContentTypes.HasLength = true; + return Error::success(); } // Parse v5 directory/file entry content descriptions. @@ -191,14 +224,15 @@ parseV2DirFileTables(const DWARFDataExtractor &DebugLineData, static llvm::Expected<ContentDescriptors> parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, DWARFDebugLine::ContentTypeTracker *ContentTypes) { + Error Err = Error::success(); ContentDescriptors Descriptors; - int FormatCount = DebugLineData.getU8(OffsetPtr); + int FormatCount = DebugLineData.getU8(OffsetPtr, &Err); bool HasPath = false; - for (int I = 0; I != FormatCount; ++I) { + for (int I = 0; I != FormatCount && !Err; ++I) { ContentDescriptor Descriptor; Descriptor.Type = - dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr)); - Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr)); + dwarf::LineNumberEntryFormat(DebugLineData.getULEB128(OffsetPtr, &Err)); + Descriptor.Form = dwarf::Form(DebugLineData.getULEB128(OffsetPtr, &Err)); if (Descriptor.Type == dwarf::DW_LNCT_path) HasPath = true; if (ContentTypes) @@ -206,6 +240,11 @@ parseV5EntryFormat(const DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, Descriptors.push_back(Descriptor); } + if (Err) + return createStringError(errc::invalid_argument, + "failed to parse entry content descriptors: %s", + toString(std::move(Err)).c_str()); + if (!HasPath) return createStringError(errc::invalid_argument, "failed to parse entry content descriptions" @@ -227,8 +266,8 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, return DirDescriptors.takeError(); // Get the directory entries, according to the format described above. - int DirEntryCount = DebugLineData.getU8(OffsetPtr); - for (int I = 0; I != DirEntryCount; ++I) { + uint64_t DirEntryCount = DebugLineData.getULEB128(OffsetPtr); + for (uint64_t I = 0; I != DirEntryCount; ++I) { for (auto Descriptor : *DirDescriptors) { DWARFFormValue Value(Descriptor.Form); switch (Descriptor.Type) { @@ -236,14 +275,14 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) return createStringError(errc::invalid_argument, "failed to parse directory entry because " - "extracting the form value failed."); + "extracting the form value failed"); IncludeDirectories.push_back(Value); break; default: if (!Value.skipValue(DebugLineData, OffsetPtr, FormParams)) return createStringError(errc::invalid_argument, "failed to parse directory entry because " - "skipping the form value failed."); + "skipping the form value failed"); } } } @@ -255,15 +294,15 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, return FileDescriptors.takeError(); // Get the file entries, according to the format described above. - int FileEntryCount = DebugLineData.getU8(OffsetPtr); - for (int I = 0; I != FileEntryCount; ++I) { + uint64_t FileEntryCount = DebugLineData.getULEB128(OffsetPtr); + for (uint64_t I = 0; I != FileEntryCount; ++I) { DWARFDebugLine::FileNameEntry FileEntry; for (auto Descriptor : *FileDescriptors) { DWARFFormValue Value(Descriptor.Form); if (!Value.extractValue(DebugLineData, OffsetPtr, FormParams, &Ctx, U)) return createStringError(errc::invalid_argument, "failed to parse file entry because " - "extracting the form value failed."); + "extracting the form value failed"); switch (Descriptor.Type) { case DW_LNCT_path: FileEntry.Name = Value; @@ -297,78 +336,114 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData, return Error::success(); } -Error DWARFDebugLine::Prologue::parse(const DWARFDataExtractor &DebugLineData, - uint64_t *OffsetPtr, - const DWARFContext &Ctx, - const DWARFUnit *U) { +uint64_t DWARFDebugLine::Prologue::getLength() const { + uint64_t Length = PrologueLength + sizeofTotalLength() + + sizeof(getVersion()) + sizeofPrologueLength(); + if (getVersion() >= 5) + Length += 2; // Address + Segment selector sizes. + return Length; +} + +Error DWARFDebugLine::Prologue::parse( + DWARFDataExtractor DebugLineData, uint64_t *OffsetPtr, + function_ref<void(Error)> RecoverableErrorHandler, const DWARFContext &Ctx, + const DWARFUnit *U) { const uint64_t PrologueOffset = *OffsetPtr; clear(); - TotalLength = DebugLineData.getRelocatedValue(4, OffsetPtr); - if (TotalLength == dwarf::DW_LENGTH_DWARF64) { - FormParams.Format = dwarf::DWARF64; - TotalLength = DebugLineData.getU64(OffsetPtr); - } else if (TotalLength >= dwarf::DW_LENGTH_lo_reserved) { - return createStringError(errc::invalid_argument, + DataExtractor::Cursor Cursor(*OffsetPtr); + std::tie(TotalLength, FormParams.Format) = + DebugLineData.getInitialLength(Cursor); + + DebugLineData = + DWARFDataExtractor(DebugLineData, Cursor.tell() + TotalLength); + FormParams.Version = DebugLineData.getU16(Cursor); + if (Cursor && !versionIsSupported(getVersion())) { + // Treat this error as unrecoverable - we cannot be sure what any of + // the data represents including the length field, so cannot skip it or make + // any reasonable assumptions. + *OffsetPtr = Cursor.tell(); + return createStringError( + errc::not_supported, "parsing line table prologue at offset 0x%8.8" PRIx64 - " unsupported reserved unit length found of value 0x%8.8" PRIx64, - PrologueOffset, TotalLength); + ": unsupported version %" PRIu16, + PrologueOffset, getVersion()); } - FormParams.Version = DebugLineData.getU16(OffsetPtr); - if (getVersion() < 2) - return createStringError(errc::not_supported, - "parsing line table prologue at offset 0x%8.8" PRIx64 - " found unsupported version 0x%2.2" PRIx16, - PrologueOffset, getVersion()); if (getVersion() >= 5) { - FormParams.AddrSize = DebugLineData.getU8(OffsetPtr); - assert((DebugLineData.getAddressSize() == 0 || + FormParams.AddrSize = DebugLineData.getU8(Cursor); + assert((!Cursor || DebugLineData.getAddressSize() == 0 || DebugLineData.getAddressSize() == getAddressSize()) && "Line table header and data extractor disagree"); - SegSelectorSize = DebugLineData.getU8(OffsetPtr); + SegSelectorSize = DebugLineData.getU8(Cursor); } PrologueLength = - DebugLineData.getRelocatedValue(sizeofPrologueLength(), OffsetPtr); - const uint64_t EndPrologueOffset = PrologueLength + *OffsetPtr; - MinInstLength = DebugLineData.getU8(OffsetPtr); + DebugLineData.getRelocatedValue(Cursor, sizeofPrologueLength()); + const uint64_t EndPrologueOffset = PrologueLength + Cursor.tell(); + DebugLineData = DWARFDataExtractor(DebugLineData, EndPrologueOffset); + MinInstLength = DebugLineData.getU8(Cursor); if (getVersion() >= 4) - MaxOpsPerInst = DebugLineData.getU8(OffsetPtr); - DefaultIsStmt = DebugLineData.getU8(OffsetPtr); - LineBase = DebugLineData.getU8(OffsetPtr); - LineRange = DebugLineData.getU8(OffsetPtr); - OpcodeBase = DebugLineData.getU8(OffsetPtr); - - StandardOpcodeLengths.reserve(OpcodeBase - 1); - for (uint32_t I = 1; I < OpcodeBase; ++I) { - uint8_t OpLen = DebugLineData.getU8(OffsetPtr); - StandardOpcodeLengths.push_back(OpLen); + MaxOpsPerInst = DebugLineData.getU8(Cursor); + DefaultIsStmt = DebugLineData.getU8(Cursor); + LineBase = DebugLineData.getU8(Cursor); + LineRange = DebugLineData.getU8(Cursor); + OpcodeBase = DebugLineData.getU8(Cursor); + + if (Cursor && OpcodeBase == 0) { + // If the opcode base is 0, we cannot read the standard opcode lengths (of + // which there are supposed to be one fewer than the opcode base). Assume + // there are no standard opcodes and continue parsing. + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "parsing line table prologue at offset 0x%8.8" PRIx64 + " found opcode base of 0. Assuming no standard opcodes", + PrologueOffset)); + } else if (Cursor) { + StandardOpcodeLengths.reserve(OpcodeBase - 1); + for (uint32_t I = 1; I < OpcodeBase; ++I) { + uint8_t OpLen = DebugLineData.getU8(Cursor); + StandardOpcodeLengths.push_back(OpLen); + } } - if (getVersion() >= 5) { - if (Error E = - parseV5DirFileTables(DebugLineData, OffsetPtr, FormParams, Ctx, U, - ContentTypes, IncludeDirectories, FileNames)) { - return joinErrors( - createStringError( - errc::invalid_argument, - "parsing line table prologue at 0x%8.8" PRIx64 - " found an invalid directory or file table description at" - " 0x%8.8" PRIx64, - PrologueOffset, *OffsetPtr), - std::move(E)); - } - } else - parseV2DirFileTables(DebugLineData, OffsetPtr, EndPrologueOffset, - ContentTypes, IncludeDirectories, FileNames); + *OffsetPtr = Cursor.tell(); + // A corrupt file name or directory table does not prevent interpretation of + // the main line program, so check the cursor state now so that its errors can + // be handled separately. + if (!Cursor) + return createStringError( + errc::invalid_argument, + "parsing line table prologue at offset 0x%8.8" PRIx64 ": %s", + PrologueOffset, toString(Cursor.takeError()).c_str()); + + Error E = + getVersion() >= 5 + ? parseV5DirFileTables(DebugLineData, OffsetPtr, FormParams, Ctx, U, + ContentTypes, IncludeDirectories, FileNames) + : parseV2DirFileTables(DebugLineData, OffsetPtr, ContentTypes, + IncludeDirectories, FileNames); + if (E) { + RecoverableErrorHandler(joinErrors( + createStringError( + errc::invalid_argument, + "parsing line table prologue at 0x%8.8" PRIx64 + " found an invalid directory or file table description at" + " 0x%8.8" PRIx64, + PrologueOffset, *OffsetPtr), + std::move(E))); + return Error::success(); + } - if (*OffsetPtr != EndPrologueOffset) - return createStringError(errc::invalid_argument, - "parsing line table prologue at 0x%8.8" PRIx64 - " should have ended at 0x%8.8" PRIx64 - " but it ended at 0x%8.8" PRIx64, - PrologueOffset, EndPrologueOffset, *OffsetPtr); + assert(*OffsetPtr <= EndPrologueOffset); + if (*OffsetPtr != EndPrologueOffset) { + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "unknown data in line table prologue at offset 0x%8.8" PRIx64 + ": parsing ended (at offset 0x%8.8" PRIx64 + ") before reaching the prologue end at offset 0x%8.8" PRIx64, + PrologueOffset, *OffsetPtr, EndPrologueOffset)); + } return Error::success(); } @@ -396,10 +471,12 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) { EpilogueBegin = false; } -void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS) { - OS << "Address Line Column File ISA Discriminator Flags\n" - << "------------------ ------ ------ ------ --- ------------- " - "-------------\n"; +void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS, unsigned Indent) { + OS.indent(Indent) + << "Address Line Column File ISA Discriminator Flags\n"; + OS.indent(Indent) + << "------------------ ------ ------ ------ --- ------------- " + "-------------\n"; } void DWARFDebugLine::Row::dump(raw_ostream &OS) const { @@ -430,7 +507,7 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS, if (!Rows.empty()) { OS << '\n'; - Row::dumpTableHeader(OS); + Row::dumpTableHeader(OS, 0); for (const Row &R : Rows) { R.dump(OS); } @@ -447,8 +524,10 @@ void DWARFDebugLine::LineTable::clear() { Sequences.clear(); } -DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT) - : LineTable(LT) { +DWARFDebugLine::ParsingState::ParsingState( + struct LineTable *LT, uint64_t TableOffset, + function_ref<void(Error)> ErrorHandler) + : LineTable(LT), LineTableOffset(TableOffset), ErrorHandler(ErrorHandler) { resetRowAndSequence(); } @@ -488,7 +567,7 @@ DWARFDebugLine::getLineTable(uint64_t Offset) const { Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable( DWARFDataExtractor &DebugLineData, uint64_t Offset, const DWARFContext &Ctx, - const DWARFUnit *U, function_ref<void(Error)> RecoverableErrorCallback) { + const DWARFUnit *U, function_ref<void(Error)> RecoverableErrorHandler) { if (!DebugLineData.isValidOffset(Offset)) return createStringError(errc::invalid_argument, "offset 0x%8.8" PRIx64 " is not a valid debug line section offset", @@ -499,32 +578,163 @@ Expected<const DWARFDebugLine::LineTable *> DWARFDebugLine::getOrParseLineTable( LineTable *LT = &Pos.first->second; if (Pos.second) { if (Error Err = - LT->parse(DebugLineData, &Offset, Ctx, U, RecoverableErrorCallback)) + LT->parse(DebugLineData, &Offset, Ctx, U, RecoverableErrorHandler)) return std::move(Err); return LT; } return LT; } +static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) { + assert(Opcode != 0); + if (Opcode < OpcodeBase) + return LNStandardString(Opcode); + return "special"; +} + +uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance, + uint8_t Opcode, + uint64_t OpcodeOffset) { + StringRef OpcodeName = getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase); + // For versions less than 4, the MaxOpsPerInst member is set to 0, as the + // maximum_operations_per_instruction field wasn't introduced until DWARFv4. + // Don't warn about bad values in this situation. + if (ReportAdvanceAddrProblem && LineTable->Prologue.getVersion() >= 4 && + LineTable->Prologue.MaxOpsPerInst != 1) + ErrorHandler(createStringError( + errc::not_supported, + "line table program at offset 0x%8.8" PRIx64 + " contains a %s opcode at offset 0x%8.8" PRIx64 + ", but the prologue maximum_operations_per_instruction value is %" PRId8 + ", which is unsupported. Assuming a value of 1 instead", + LineTableOffset, OpcodeName.data(), OpcodeOffset, + LineTable->Prologue.MaxOpsPerInst)); + if (ReportAdvanceAddrProblem && LineTable->Prologue.MinInstLength == 0) + ErrorHandler( + createStringError(errc::invalid_argument, + "line table program at offset 0x%8.8" PRIx64 + " contains a %s opcode at offset 0x%8.8" PRIx64 + ", but the prologue minimum_instruction_length value " + "is 0, which prevents any address advancing", + LineTableOffset, OpcodeName.data(), OpcodeOffset)); + ReportAdvanceAddrProblem = false; + uint64_t AddrOffset = OperationAdvance * LineTable->Prologue.MinInstLength; + Row.Address.Address += AddrOffset; + return AddrOffset; +} + +DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode +DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode, + uint64_t OpcodeOffset) { + assert(Opcode == DW_LNS_const_add_pc || + Opcode >= LineTable->Prologue.OpcodeBase); + if (ReportBadLineRange && LineTable->Prologue.LineRange == 0) { + StringRef OpcodeName = + getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase); + ErrorHandler( + createStringError(errc::not_supported, + "line table program at offset 0x%8.8" PRIx64 + " contains a %s opcode at offset 0x%8.8" PRIx64 + ", but the prologue line_range value is 0. The " + "address and line will not be adjusted", + LineTableOffset, OpcodeName.data(), OpcodeOffset)); + ReportBadLineRange = false; + } + + uint8_t OpcodeValue = Opcode; + if (Opcode == DW_LNS_const_add_pc) + OpcodeValue = 255; + uint8_t AdjustedOpcode = OpcodeValue - LineTable->Prologue.OpcodeBase; + uint64_t OperationAdvance = + LineTable->Prologue.LineRange != 0 + ? AdjustedOpcode / LineTable->Prologue.LineRange + : 0; + uint64_t AddrOffset = advanceAddr(OperationAdvance, Opcode, OpcodeOffset); + return {AddrOffset, AdjustedOpcode}; +} + +DWARFDebugLine::ParsingState::AddrAndLineDelta +DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode, + uint64_t OpcodeOffset) { + // A special opcode value is chosen based on the amount that needs + // to be added to the line and address registers. The maximum line + // increment for a special opcode is the value of the line_base + // field in the header, plus the value of the line_range field, + // minus 1 (line base + line range - 1). If the desired line + // increment is greater than the maximum line increment, a standard + // opcode must be used instead of a special opcode. The "address + // advance" is calculated by dividing the desired address increment + // by the minimum_instruction_length field from the header. The + // special opcode is then calculated using the following formula: + // + // opcode = (desired line increment - line_base) + + // (line_range * address advance) + opcode_base + // + // If the resulting opcode is greater than 255, a standard opcode + // must be used instead. + // + // To decode a special opcode, subtract the opcode_base from the + // opcode itself to give the adjusted opcode. The amount to + // increment the address register is the result of the adjusted + // opcode divided by the line_range multiplied by the + // minimum_instruction_length field from the header. That is: + // + // address increment = (adjusted opcode / line_range) * + // minimum_instruction_length + // + // The amount to increment the line register is the line_base plus + // the result of the adjusted opcode modulo the line_range. That is: + // + // line increment = line_base + (adjusted opcode % line_range) + + DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode AddrAdvanceResult = + advanceAddrForOpcode(Opcode, OpcodeOffset); + int32_t LineOffset = 0; + if (LineTable->Prologue.LineRange != 0) + LineOffset = + LineTable->Prologue.LineBase + + (AddrAdvanceResult.AdjustedOpcode % LineTable->Prologue.LineRange); + Row.Line += LineOffset; + return {AddrAdvanceResult.AddrDelta, LineOffset}; +} + +/// Parse a ULEB128 using the specified \p Cursor. \returns the parsed value on +/// success, or None if \p Cursor is in a failing state. +template <typename T> +static Optional<T> parseULEB128(DWARFDataExtractor &Data, + DataExtractor::Cursor &Cursor) { + T Value = Data.getULEB128(Cursor); + if (Cursor) + return Value; + return None; +} + Error DWARFDebugLine::LineTable::parse( DWARFDataExtractor &DebugLineData, uint64_t *OffsetPtr, const DWARFContext &Ctx, const DWARFUnit *U, - function_ref<void(Error)> RecoverableErrorCallback, raw_ostream *OS) { + function_ref<void(Error)> RecoverableErrorHandler, raw_ostream *OS, + bool Verbose) { + assert((OS || !Verbose) && "cannot have verbose output without stream"); const uint64_t DebugLineOffset = *OffsetPtr; clear(); - Error PrologueErr = Prologue.parse(DebugLineData, OffsetPtr, Ctx, U); + Error PrologueErr = + Prologue.parse(DebugLineData, OffsetPtr, RecoverableErrorHandler, Ctx, U); if (OS) { - // The presence of OS signals verbose dumping. DIDumpOptions DumpOptions; - DumpOptions.Verbose = true; + DumpOptions.Verbose = Verbose; Prologue.dump(*OS, DumpOptions); } - if (PrologueErr) + if (PrologueErr) { + // Ensure there is a blank line after the prologue to clearly delineate it + // from later dumps. + if (OS) + *OS << "\n"; return PrologueErr; + } uint64_t ProgramLength = Prologue.TotalLength + Prologue.sizeofTotalLength(); if (!DebugLineData.isValidOffsetForDataOfSize(DebugLineOffset, @@ -532,7 +742,7 @@ Error DWARFDebugLine::LineTable::parse( assert(DebugLineData.size() > DebugLineOffset && "prologue parsing should handle invalid offset"); uint64_t BytesRemaining = DebugLineData.size() - DebugLineOffset; - RecoverableErrorCallback( + RecoverableErrorHandler( createStringError(errc::invalid_argument, "line table program with offset 0x%8.8" PRIx64 " has length 0x%8.8" PRIx64 " but only 0x%8.8" PRIx64 @@ -542,41 +752,62 @@ Error DWARFDebugLine::LineTable::parse( ProgramLength = BytesRemaining; } + // Create a DataExtractor which can only see the data up to the end of the + // table, to prevent reading past the end. const uint64_t EndOffset = DebugLineOffset + ProgramLength; + DWARFDataExtractor TableData(DebugLineData, EndOffset); // See if we should tell the data extractor the address size. - if (DebugLineData.getAddressSize() == 0) - DebugLineData.setAddressSize(Prologue.getAddressSize()); + if (TableData.getAddressSize() == 0) + TableData.setAddressSize(Prologue.getAddressSize()); else assert(Prologue.getAddressSize() == 0 || - Prologue.getAddressSize() == DebugLineData.getAddressSize()); + Prologue.getAddressSize() == TableData.getAddressSize()); - ParsingState State(this); + ParsingState State(this, DebugLineOffset, RecoverableErrorHandler); + *OffsetPtr = DebugLineOffset + Prologue.getLength(); + if (OS && *OffsetPtr < EndOffset) { + *OS << '\n'; + Row::dumpTableHeader(*OS, /*Indent=*/Verbose ? 12 : 0); + } while (*OffsetPtr < EndOffset) { - if (OS) + DataExtractor::Cursor Cursor(*OffsetPtr); + + if (Verbose) *OS << format("0x%08.08" PRIx64 ": ", *OffsetPtr); - uint8_t Opcode = DebugLineData.getU8(OffsetPtr); + uint64_t OpcodeOffset = *OffsetPtr; + uint8_t Opcode = TableData.getU8(Cursor); + size_t RowCount = Rows.size(); - if (OS) + if (Cursor && Verbose) *OS << format("%02.02" PRIx8 " ", Opcode); if (Opcode == 0) { // Extended Opcodes always start with a zero opcode followed by // a uleb128 length so you can skip ones you don't know about - uint64_t Len = DebugLineData.getULEB128(OffsetPtr); - uint64_t ExtOffset = *OffsetPtr; + uint64_t Len = TableData.getULEB128(Cursor); + uint64_t ExtOffset = Cursor.tell(); // Tolerate zero-length; assume length is correct and soldier on. if (Len == 0) { - if (OS) + if (Cursor && Verbose) *OS << "Badly formed extended line op (length 0)\n"; + if (!Cursor) { + if (Verbose) + *OS << "\n"; + RecoverableErrorHandler(Cursor.takeError()); + } + *OffsetPtr = Cursor.tell(); continue; } - uint8_t SubOpcode = DebugLineData.getU8(OffsetPtr); - if (OS) + uint8_t SubOpcode = TableData.getU8(Cursor); + // OperandOffset will be the same as ExtOffset, if it was not possible to + // read the SubOpcode. + uint64_t OperandOffset = Cursor.tell(); + if (Verbose) *OS << LNExtendedString(SubOpcode); switch (SubOpcode) { case DW_LNE_end_sequence: @@ -588,11 +819,15 @@ Error DWARFDebugLine::LineTable::parse( // address is that of the byte after the last target machine instruction // of the sequence. State.Row.EndSequence = true; - if (OS) { + // No need to test the Cursor is valid here, since it must be to get + // into this code path - if it were invalid, the default case would be + // followed. + if (Verbose) { *OS << "\n"; OS->indent(12); - State.Row.dump(*OS); } + if (OS) + State.Row.dump(*OS); State.appendRowToMatrix(); State.resetRowAndSequence(); break; @@ -608,25 +843,39 @@ Error DWARFDebugLine::LineTable::parse( // Make sure the extractor knows the address size. If not, infer it // from the size of the operand. { - uint8_t ExtractorAddressSize = DebugLineData.getAddressSize(); - if (ExtractorAddressSize != Len - 1 && ExtractorAddressSize != 0) - RecoverableErrorCallback(createStringError( + uint8_t ExtractorAddressSize = TableData.getAddressSize(); + uint64_t OpcodeAddressSize = Len - 1; + if (ExtractorAddressSize != OpcodeAddressSize && + ExtractorAddressSize != 0) + RecoverableErrorHandler(createStringError( errc::invalid_argument, "mismatching address size at offset 0x%8.8" PRIx64 " expected 0x%2.2" PRIx8 " found 0x%2.2" PRIx64, ExtOffset, ExtractorAddressSize, Len - 1)); // Assume that the line table is correct and temporarily override the - // address size. - DebugLineData.setAddressSize(Len - 1); - State.Row.Address.Address = DebugLineData.getRelocatedAddress( - OffsetPtr, &State.Row.Address.SectionIndex); - - // Restore the address size if the extractor already had it. - if (ExtractorAddressSize != 0) - DebugLineData.setAddressSize(ExtractorAddressSize); + // address size. If the size is unsupported, give up trying to read + // the address and continue to the next opcode. + if (OpcodeAddressSize != 1 && OpcodeAddressSize != 2 && + OpcodeAddressSize != 4 && OpcodeAddressSize != 8) { + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "address size 0x%2.2" PRIx64 + " of DW_LNE_set_address opcode at offset 0x%8.8" PRIx64 + " is unsupported", + OpcodeAddressSize, ExtOffset)); + TableData.skip(Cursor, OpcodeAddressSize); + } else { + TableData.setAddressSize(OpcodeAddressSize); + State.Row.Address.Address = TableData.getRelocatedAddress( + Cursor, &State.Row.Address.SectionIndex); + + // Restore the address size if the extractor already had it. + if (ExtractorAddressSize != 0) + TableData.setAddressSize(ExtractorAddressSize); + } - if (OS) + if (Cursor && Verbose) *OS << format(" (0x%16.16" PRIx64 ")", State.Row.Address.Address); } break; @@ -654,14 +903,14 @@ Error DWARFDebugLine::LineTable::parse( // the file register of the state machine. { FileNameEntry FileEntry; - const char *Name = DebugLineData.getCStr(OffsetPtr); + const char *Name = TableData.getCStr(Cursor); FileEntry.Name = DWARFFormValue::createFromPValue(dwarf::DW_FORM_string, Name); - FileEntry.DirIdx = DebugLineData.getULEB128(OffsetPtr); - FileEntry.ModTime = DebugLineData.getULEB128(OffsetPtr); - FileEntry.Length = DebugLineData.getULEB128(OffsetPtr); + FileEntry.DirIdx = TableData.getULEB128(Cursor); + FileEntry.ModTime = TableData.getULEB128(Cursor); + FileEntry.Length = TableData.getULEB128(Cursor); Prologue.FileNames.push_back(FileEntry); - if (OS) + if (Cursor && Verbose) *OS << " (" << Name << ", dir=" << FileEntry.DirIdx << ", mod_time=" << format("(0x%16.16" PRIx64 ")", FileEntry.ModTime) << ", length=" << FileEntry.Length << ")"; @@ -669,41 +918,63 @@ Error DWARFDebugLine::LineTable::parse( break; case DW_LNE_set_discriminator: - State.Row.Discriminator = DebugLineData.getULEB128(OffsetPtr); - if (OS) + State.Row.Discriminator = TableData.getULEB128(Cursor); + if (Cursor && Verbose) *OS << " (" << State.Row.Discriminator << ")"; break; default: - if (OS) + if (Cursor && Verbose) *OS << format("Unrecognized extended op 0x%02.02" PRIx8, SubOpcode) << format(" length %" PRIx64, Len); // Len doesn't include the zero opcode byte or the length itself, but // it does include the sub_opcode, so we have to adjust for that. - (*OffsetPtr) += Len - 1; + TableData.skip(Cursor, Len - 1); break; } - // Make sure the stated and parsed lengths are the same. - // Otherwise we have an unparseable line-number program. - if (*OffsetPtr - ExtOffset != Len) - return createStringError(errc::illegal_byte_sequence, - "unexpected line op length at offset 0x%8.8" PRIx64 - " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64, - ExtOffset, Len, *OffsetPtr - ExtOffset); + // Make sure the length as recorded in the table and the standard length + // for the opcode match. If they don't, continue from the end as claimed + // by the table. Similarly, continue from the claimed end in the event of + // a parsing error. + uint64_t End = ExtOffset + Len; + if (Cursor && Cursor.tell() != End) + RecoverableErrorHandler(createStringError( + errc::illegal_byte_sequence, + "unexpected line op length at offset 0x%8.8" PRIx64 + " expected 0x%2.2" PRIx64 " found 0x%2.2" PRIx64, + ExtOffset, Len, Cursor.tell() - ExtOffset)); + if (!Cursor && Verbose) { + DWARFDataExtractor::Cursor ByteCursor(OperandOffset); + uint8_t Byte = TableData.getU8(ByteCursor); + if (ByteCursor) { + *OS << " (<parsing error>"; + do { + *OS << format(" %2.2" PRIx8, Byte); + Byte = TableData.getU8(ByteCursor); + } while (ByteCursor); + *OS << ")"; + } + + // The only parse failure in this case should be if the end was reached. + // In that case, throw away the error, as the main Cursor's error will + // be sufficient. + consumeError(ByteCursor.takeError()); + } + *OffsetPtr = End; } else if (Opcode < Prologue.OpcodeBase) { - if (OS) + if (Verbose) *OS << LNStandardString(Opcode); switch (Opcode) { // Standard Opcodes case DW_LNS_copy: // Takes no arguments. Append a row to the matrix using the // current values of the state-machine registers. - if (OS) { + if (Verbose) { *OS << "\n"; OS->indent(12); - State.Row.dump(*OS); - *OS << "\n"; } + if (OS) + State.Row.dump(*OS); State.appendRowToMatrix(); break; @@ -711,11 +982,11 @@ Error DWARFDebugLine::LineTable::parse( // Takes a single unsigned LEB128 operand, multiplies it by the // min_inst_length field of the prologue, and adds the // result to the address register of the state machine. - { + if (Optional<uint64_t> Operand = + parseULEB128<uint64_t>(TableData, Cursor)) { uint64_t AddrOffset = - DebugLineData.getULEB128(OffsetPtr) * Prologue.MinInstLength; - State.Row.Address.Address += AddrOffset; - if (OS) + State.advanceAddr(*Operand, Opcode, OpcodeOffset); + if (Verbose) *OS << " (" << AddrOffset << ")"; } break; @@ -723,25 +994,36 @@ Error DWARFDebugLine::LineTable::parse( case DW_LNS_advance_line: // Takes a single signed LEB128 operand and adds that value to // the line register of the state machine. - State.Row.Line += DebugLineData.getSLEB128(OffsetPtr); - if (OS) - *OS << " (" << State.Row.Line << ")"; + { + int64_t LineDelta = TableData.getSLEB128(Cursor); + if (Cursor) { + State.Row.Line += LineDelta; + if (Verbose) + *OS << " (" << State.Row.Line << ")"; + } + } break; case DW_LNS_set_file: // Takes a single unsigned LEB128 operand and stores it in the file // register of the state machine. - State.Row.File = DebugLineData.getULEB128(OffsetPtr); - if (OS) - *OS << " (" << State.Row.File << ")"; + if (Optional<uint16_t> File = + parseULEB128<uint16_t>(TableData, Cursor)) { + State.Row.File = *File; + if (Verbose) + *OS << " (" << State.Row.File << ")"; + } break; case DW_LNS_set_column: // Takes a single unsigned LEB128 operand and stores it in the // column register of the state machine. - State.Row.Column = DebugLineData.getULEB128(OffsetPtr); - if (OS) - *OS << " (" << State.Row.Column << ")"; + if (Optional<uint16_t> Column = + parseULEB128<uint16_t>(TableData, Cursor)) { + State.Row.Column = *Column; + if (Verbose) + *OS << " (" << State.Row.Column << ")"; + } break; case DW_LNS_negate_stmt: @@ -769,13 +1051,10 @@ Error DWARFDebugLine::LineTable::parse( // than twice that range will it need to use both DW_LNS_advance_pc // and a special opcode, requiring three or more bytes. { - uint8_t AdjustOpcode = 255 - Prologue.OpcodeBase; uint64_t AddrOffset = - (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; - State.Row.Address.Address += AddrOffset; - if (OS) - *OS - << format(" (0x%16.16" PRIx64 ")", AddrOffset); + State.advanceAddrForOpcode(Opcode, OpcodeOffset).AddrDelta; + if (Verbose) + *OS << format(" (0x%16.16" PRIx64 ")", AddrOffset); } break; @@ -790,11 +1069,13 @@ Error DWARFDebugLine::LineTable::parse( // requires the use of DW_LNS_advance_pc. Such assemblers, however, // can use DW_LNS_fixed_advance_pc instead, sacrificing compression. { - uint16_t PCOffset = DebugLineData.getRelocatedValue(2, OffsetPtr); - State.Row.Address.Address += PCOffset; - if (OS) - *OS - << format(" (0x%4.4" PRIx16 ")", PCOffset); + uint16_t PCOffset = + TableData.getRelocatedValue(Cursor, 2); + if (Cursor) { + State.Row.Address.Address += PCOffset; + if (Verbose) + *OS << format(" (0x%4.4" PRIx16 ")", PCOffset); + } } break; @@ -812,10 +1093,12 @@ Error DWARFDebugLine::LineTable::parse( case DW_LNS_set_isa: // Takes a single unsigned LEB128 operand and stores it in the - // column register of the state machine. - State.Row.Isa = DebugLineData.getULEB128(OffsetPtr); - if (OS) - *OS << " (" << (uint64_t)State.Row.Isa << ")"; + // ISA register of the state machine. + if (Optional<uint8_t> Isa = parseULEB128<uint8_t>(TableData, Cursor)) { + State.Row.Isa = *Isa; + if (Verbose) + *OS << " (" << (uint64_t)State.Row.Isa << ")"; + } break; default: @@ -824,73 +1107,72 @@ Error DWARFDebugLine::LineTable::parse( // as a multiple of LEB128 operands for each opcode. { assert(Opcode - 1U < Prologue.StandardOpcodeLengths.size()); + if (Verbose) + *OS << "Unrecognized standard opcode"; uint8_t OpcodeLength = Prologue.StandardOpcodeLengths[Opcode - 1]; + std::vector<uint64_t> Operands; for (uint8_t I = 0; I < OpcodeLength; ++I) { - uint64_t Value = DebugLineData.getULEB128(OffsetPtr); - if (OS) - *OS << format("Skipping ULEB128 value: 0x%16.16" PRIx64 ")\n", - Value); + if (Optional<uint64_t> Value = + parseULEB128<uint64_t>(TableData, Cursor)) + Operands.push_back(*Value); + else + break; + } + if (Verbose && !Operands.empty()) { + *OS << " (operands: "; + bool First = true; + for (uint64_t Value : Operands) { + if (!First) + *OS << ", "; + First = false; + *OS << format("0x%16.16" PRIx64, Value); + } + if (Verbose) + *OS << ')'; } } break; } + + *OffsetPtr = Cursor.tell(); } else { - // Special Opcodes - - // A special opcode value is chosen based on the amount that needs - // to be added to the line and address registers. The maximum line - // increment for a special opcode is the value of the line_base - // field in the header, plus the value of the line_range field, - // minus 1 (line base + line range - 1). If the desired line - // increment is greater than the maximum line increment, a standard - // opcode must be used instead of a special opcode. The "address - // advance" is calculated by dividing the desired address increment - // by the minimum_instruction_length field from the header. The - // special opcode is then calculated using the following formula: - // - // opcode = (desired line increment - line_base) + - // (line_range * address advance) + opcode_base - // - // If the resulting opcode is greater than 255, a standard opcode - // must be used instead. - // - // To decode a special opcode, subtract the opcode_base from the - // opcode itself to give the adjusted opcode. The amount to - // increment the address register is the result of the adjusted - // opcode divided by the line_range multiplied by the - // minimum_instruction_length field from the header. That is: - // - // address increment = (adjusted opcode / line_range) * - // minimum_instruction_length - // - // The amount to increment the line register is the line_base plus - // the result of the adjusted opcode modulo the line_range. That is: - // - // line increment = line_base + (adjusted opcode % line_range) - - uint8_t AdjustOpcode = Opcode - Prologue.OpcodeBase; - uint64_t AddrOffset = - (AdjustOpcode / Prologue.LineRange) * Prologue.MinInstLength; - int32_t LineOffset = - Prologue.LineBase + (AdjustOpcode % Prologue.LineRange); - State.Row.Line += LineOffset; - State.Row.Address.Address += AddrOffset; - - if (OS) { - *OS << "address += " << AddrOffset << ", line += " << LineOffset + // Special Opcodes. + ParsingState::AddrAndLineDelta Delta = + State.handleSpecialOpcode(Opcode, OpcodeOffset); + + if (Verbose) { + *OS << "address += " << Delta.Address << ", line += " << Delta.Line << "\n"; OS->indent(12); - State.Row.dump(*OS); } + if (OS) + State.Row.dump(*OS); State.appendRowToMatrix(); + *OffsetPtr = Cursor.tell(); } - if(OS) + + // When a row is added to the matrix, it is also dumped, which includes a + // new line already, so don't add an extra one. + if (Verbose && Rows.size() == RowCount) *OS << "\n"; + + // Most parse failures other than when parsing extended opcodes are due to + // failures to read ULEBs. Bail out of parsing, since we don't know where to + // continue reading from as there is no stated length for such byte + // sequences. Print the final trailing new line if needed before doing so. + if (!Cursor && Opcode != 0) { + if (Verbose) + *OS << "\n"; + return Cursor.takeError(); + } + + if (!Cursor) + RecoverableErrorHandler(Cursor.takeError()); } if (!State.Sequence.Empty) - RecoverableErrorCallback(createStringError( + RecoverableErrorHandler(createStringError( errc::illegal_byte_sequence, "last sequence in debug line table at offset 0x%8.8" PRIx64 " is not terminated", @@ -907,6 +1189,11 @@ Error DWARFDebugLine::LineTable::parse( // rudimentary sequences for address ranges [0x0, 0xsomething). } + // Terminate the table with a final blank line to clearly delineate it from + // later dumps. + if (OS) + *OS << "\n"; + return Error::success(); } @@ -1054,9 +1341,13 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex( if (!Name) return false; StringRef FileName = *Name; - if (Kind != FileLineInfoKind::AbsoluteFilePath || + if (Kind == FileLineInfoKind::RawValue || isPathAbsoluteOnWindowsOrPosix(FileName)) { - Result = FileName; + Result = std::string(FileName); + return true; + } + if (Kind == FileLineInfoKind::BaseNameOnly) { + Result = std::string(llvm::sys::path::filename(FileName)); return true; } @@ -1064,23 +1355,31 @@ bool DWARFDebugLine::Prologue::getFileNameByIndex( StringRef IncludeDir; // Be defensive about the contents of Entry. if (getVersion() >= 5) { - if (Entry.DirIdx < IncludeDirectories.size()) + // DirIdx 0 is the compilation directory, so don't include it for + // relative names. + if ((Entry.DirIdx != 0 || Kind != FileLineInfoKind::RelativeFilePath) && + Entry.DirIdx < IncludeDirectories.size()) IncludeDir = IncludeDirectories[Entry.DirIdx].getAsCString().getValue(); } else { if (0 < Entry.DirIdx && Entry.DirIdx <= IncludeDirectories.size()) IncludeDir = IncludeDirectories[Entry.DirIdx - 1].getAsCString().getValue(); - - // We may still need to append compilation directory of compile unit. - // We know that FileName is not absolute, the only way to have an - // absolute path at this point would be if IncludeDir is absolute. - if (!CompDir.empty() && !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) - sys::path::append(FilePath, Style, CompDir); } + // For absolute paths only, include the compilation directory of compile unit. + // We know that FileName is not absolute, the only way to have an absolute + // path at this point would be if IncludeDir is absolute. + if (Kind == FileLineInfoKind::AbsoluteFilePath && !CompDir.empty() && + !isPathAbsoluteOnWindowsOrPosix(IncludeDir)) + sys::path::append(FilePath, Style, CompDir); + + assert((Kind == FileLineInfoKind::AbsoluteFilePath || + Kind == FileLineInfoKind::RelativeFilePath) && + "invalid FileLineInfo Kind"); + // sys::path::append skips empty strings. sys::path::append(FilePath, Style, IncludeDir, FileName); - Result = FilePath.str(); + Result = std::string(FilePath.str()); return true; } @@ -1131,34 +1430,36 @@ DWARFDebugLine::SectionParser::SectionParser(DWARFDataExtractor &Data, } bool DWARFDebugLine::Prologue::totalLengthIsValid() const { - return TotalLength == dwarf::DW_LENGTH_DWARF64 || - TotalLength < dwarf::DW_LENGTH_lo_reserved; + return TotalLength != 0u; } DWARFDebugLine::LineTable DWARFDebugLine::SectionParser::parseNext( - function_ref<void(Error)> RecoverableErrorCallback, - function_ref<void(Error)> UnrecoverableErrorCallback, raw_ostream *OS) { + function_ref<void(Error)> RecoverableErrorHandler, + function_ref<void(Error)> UnrecoverableErrorHandler, raw_ostream *OS, + bool Verbose) { assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); uint64_t OldOffset = Offset; LineTable LT; if (Error Err = LT.parse(DebugLineData, &Offset, Context, U, - RecoverableErrorCallback, OS)) - UnrecoverableErrorCallback(std::move(Err)); + RecoverableErrorHandler, OS, Verbose)) + UnrecoverableErrorHandler(std::move(Err)); moveToNextTable(OldOffset, LT.Prologue); return LT; } void DWARFDebugLine::SectionParser::skip( - function_ref<void(Error)> ErrorCallback) { + function_ref<void(Error)> RecoverableErrorHandler, + function_ref<void(Error)> UnrecoverableErrorHandler) { assert(DebugLineData.isValidOffset(Offset) && "parsing should have terminated"); DWARFUnit *U = prepareToParse(Offset); uint64_t OldOffset = Offset; LineTable LT; - if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, Context, U)) - ErrorCallback(std::move(Err)); + if (Error Err = LT.Prologue.parse(DebugLineData, &Offset, + RecoverableErrorHandler, Context, U)) + UnrecoverableErrorHandler(std::move(Err)); moveToNextTable(OldOffset, LT.Prologue); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp index 0c5f9a9c54ec..f38126364401 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -106,16 +106,15 @@ DWARFLocationInterpreter::Interpret(const DWARFLocationEntry &E) { } } -// When directly dumping the .debug_loc without a compile unit, we have to guess -// at the DWARF version. This only affects DW_OP_call_ref, which is a rare -// expression that LLVM doesn't produce. Guessing the wrong version means we -// won't be able to pretty print expressions in DWARF2 binaries produced by -// non-LLVM tools. static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data, bool IsLittleEndian, unsigned AddressSize, const MCRegisterInfo *MRI, DWARFUnit *U) { - DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize); - DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U); + DWARFDataExtractor Extractor(Data, IsLittleEndian, AddressSize); + // Note. We do not pass any format to DWARFExpression, even if the + // corresponding unit is known. For now, there is only one operation, + // DW_OP_call_ref, which depends on the format; it is rarely used, and + // is unexpected in location tables. + DWARFExpression(Extractor, AddressSize).print(OS, MRI, U); } bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS, @@ -161,9 +160,7 @@ bool DWARFLocationTable::dumpLocationList(uint64_t *Offset, raw_ostream &OS, return true; }); if (E) { - OS << "\n"; - OS.indent(Indent); - OS << "error: " << toString(std::move(E)); + DumpOpts.RecoverableErrorHandler(std::move(E)); return false; } return true; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp index 8cb259ebc622..f920d69cc43f 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp @@ -8,6 +8,8 @@ #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> @@ -15,10 +17,32 @@ using namespace llvm; using namespace dwarf; +DwarfFormat DWARFDebugMacro::MacroHeader::getDwarfFormat() const { + return Flags & MACRO_OFFSET_SIZE ? DWARF64 : DWARF32; +} + +uint8_t DWARFDebugMacro::MacroHeader::getOffsetByteSize() const { + return getDwarfOffsetByteSize(getDwarfFormat()); +} + +void DWARFDebugMacro::MacroHeader::dumpMacroHeader(raw_ostream &OS) const { + // FIXME: Add support for dumping opcode_operands_table + OS << format("macro header: version = 0x%04" PRIx16, Version) + << format(", flags = 0x%02" PRIx8, Flags) + << ", format = " << FormatString(getDwarfFormat()); + if (Flags & MACRO_DEBUG_LINE_OFFSET) + OS << format(", debug_line_offset = 0x%0*" PRIx64, 2 * getOffsetByteSize(), + DebugLineOffset); + OS << "\n"; +} + void DWARFDebugMacro::dump(raw_ostream &OS) const { unsigned IndLevel = 0; for (const auto &Macros : MacroLists) { - for (const Entry &E : Macros) { + OS << format("0x%08" PRIx64 ":\n", Macros.Offset); + if (Macros.Header.Version >= 5) + Macros.Header.dumpMacroHeader(OS); + for (const Entry &E : Macros.Macros) { // There should not be DW_MACINFO_end_file when IndLevel is Zero. However, // this check handles the case of corrupted ".debug_macinfo" section. if (IndLevel > 0) @@ -27,22 +51,40 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const { for (unsigned I = 0; I < IndLevel; I++) OS << " "; IndLevel += (E.Type == DW_MACINFO_start_file); - - WithColor(OS, HighlightColor::Macro).get() << MacinfoString(E.Type); + // Based on which version we are handling choose appropriate macro forms. + if (Macros.Header.Version >= 5) + WithColor(OS, HighlightColor::Macro).get() << MacroString(E.Type); + else + WithColor(OS, HighlightColor::Macro).get() << MacinfoString(E.Type); switch (E.Type) { default: - // Got a corrupted ".debug_macinfo" section (invalid macinfo type). + // Got a corrupted ".debug_macinfo/.debug_macro" section (invalid + // macinfo type). break; - case DW_MACINFO_define: - case DW_MACINFO_undef: + // debug_macro and debug_macinfo share some common encodings. + // DW_MACRO_define == DW_MACINFO_define + // DW_MACRO_undef == DW_MACINFO_undef + // DW_MACRO_start_file == DW_MACINFO_start_file + // DW_MACRO_end_file == DW_MACINFO_end_file + // For readability/uniformity we are using DW_MACRO_*. + case DW_MACRO_define: + case DW_MACRO_undef: + case DW_MACRO_define_strp: + case DW_MACRO_undef_strp: + case DW_MACRO_define_strx: + case DW_MACRO_undef_strx: OS << " - lineno: " << E.Line; OS << " macro: " << E.MacroStr; break; - case DW_MACINFO_start_file: + case DW_MACRO_start_file: OS << " - lineno: " << E.Line; OS << " filenum: " << E.File; break; - case DW_MACINFO_end_file: + case DW_MACRO_import: + OS << format(" - import offset: 0x%0*" PRIx64, + 2 * Macros.Header.getOffsetByteSize(), E.ImportOffset); + break; + case DW_MACRO_end_file: break; case DW_MACINFO_vendor_ext: OS << " - constant: " << E.ExtConstant; @@ -51,26 +93,46 @@ void DWARFDebugMacro::dump(raw_ostream &OS) const { } OS << "\n"; } - OS << "\n"; } } -void DWARFDebugMacro::parse(DataExtractor data) { +Error DWARFDebugMacro::parseImpl( + Optional<DWARFUnitVector::iterator_range> Units, + Optional<DataExtractor> StringExtractor, DWARFDataExtractor Data, + bool IsMacro) { uint64_t Offset = 0; MacroList *M = nullptr; - while (data.isValidOffset(Offset)) { + using MacroToUnitsMap = DenseMap<uint64_t, DWARFUnit *>; + MacroToUnitsMap MacroToUnits; + if (IsMacro && Data.isValidOffset(Offset)) { + // Keep a mapping from Macro contribution to CUs, this will + // be needed while retrieving macro from DW_MACRO_define_strx form. + for (const auto &U : Units.getValue()) + if (auto CUDIE = U->getUnitDIE()) + // Skip units which does not contibutes to macro section. + if (auto MacroOffset = toSectionOffset(CUDIE.find(DW_AT_macros))) + MacroToUnits.try_emplace(*MacroOffset, U.get()); + } + while (Data.isValidOffset(Offset)) { if (!M) { MacroLists.emplace_back(); M = &MacroLists.back(); + M->Offset = Offset; + if (IsMacro) { + auto Err = M->Header.parseMacroHeader(Data, &Offset); + if (Err) + return Err; + } } // A macro list entry consists of: - M->emplace_back(); - Entry &E = M->back(); + M->Macros.emplace_back(); + Entry &E = M->Macros.back(); // 1. Macinfo type - E.Type = data.getULEB128(&Offset); + E.Type = Data.getULEB128(&Offset); if (E.Type == 0) { - // Reached end of a ".debug_macinfo" section contribution. + // Reached end of a ".debug_macinfo/debug_macro" section contribution. + M = nullptr; continue; } @@ -79,28 +141,99 @@ void DWARFDebugMacro::parse(DataExtractor data) { // Got a corrupted ".debug_macinfo" section (invalid macinfo type). // Push the corrupted entry to the list and halt parsing. E.Type = DW_MACINFO_invalid; - return; - case DW_MACINFO_define: - case DW_MACINFO_undef: + return Error::success(); + // debug_macro and debug_macinfo share some common encodings. + // DW_MACRO_define == DW_MACINFO_define + // DW_MACRO_undef == DW_MACINFO_undef + // DW_MACRO_start_file == DW_MACINFO_start_file + // DW_MACRO_end_file == DW_MACINFO_end_file + // For readibility/uniformity we are using DW_MACRO_*. + case DW_MACRO_define: + case DW_MACRO_undef: + // 2. Source line + E.Line = Data.getULEB128(&Offset); + // 3. Macro string + E.MacroStr = Data.getCStr(&Offset); + break; + case DW_MACRO_define_strp: + case DW_MACRO_undef_strp: { + if (!IsMacro) { + // DW_MACRO_define_strp is a new form introduced in DWARFv5, it is + // not supported in debug_macinfo[.dwo] sections. Assume it as an + // invalid entry, push it and halt parsing. + E.Type = DW_MACINFO_invalid; + return Error::success(); + } + uint64_t StrOffset = 0; // 2. Source line - E.Line = data.getULEB128(&Offset); + E.Line = Data.getULEB128(&Offset); // 3. Macro string - E.MacroStr = data.getCStr(&Offset); + StrOffset = + Data.getRelocatedValue(M->Header.getOffsetByteSize(), &Offset); + assert(StringExtractor && "String Extractor not found"); + E.MacroStr = StringExtractor->getCStr(&StrOffset); + break; + } + case DW_MACRO_define_strx: + case DW_MACRO_undef_strx: { + if (!IsMacro) { + // DW_MACRO_define_strx is a new form introduced in DWARFv5, it is + // not supported in debug_macinfo[.dwo] sections. Assume it as an + // invalid entry, push it and halt parsing. + E.Type = DW_MACINFO_invalid; + return Error::success(); + } + E.Line = Data.getULEB128(&Offset); + auto MacroContributionOffset = MacroToUnits.find(M->Offset); + if (MacroContributionOffset == MacroToUnits.end()) + return createStringError(errc::invalid_argument, + "Macro contribution of the unit not found"); + Optional<uint64_t> StrOffset = + MacroContributionOffset->second->getStringOffsetSectionItem( + Data.getULEB128(&Offset)); + if (!StrOffset) + return createStringError( + errc::invalid_argument, + "String offsets contribution of the unit not found"); + E.MacroStr = + MacroContributionOffset->second->getStringExtractor().getCStr( + &*StrOffset); break; - case DW_MACINFO_start_file: + } + case DW_MACRO_start_file: // 2. Source line - E.Line = data.getULEB128(&Offset); + E.Line = Data.getULEB128(&Offset); // 3. Source file id - E.File = data.getULEB128(&Offset); + E.File = Data.getULEB128(&Offset); + break; + case DW_MACRO_end_file: break; - case DW_MACINFO_end_file: + case DW_MACRO_import: + E.ImportOffset = + Data.getRelocatedValue(M->Header.getOffsetByteSize(), &Offset); break; case DW_MACINFO_vendor_ext: // 2. Vendor extension constant - E.ExtConstant = data.getULEB128(&Offset); + E.ExtConstant = Data.getULEB128(&Offset); // 3. Vendor extension string - E.ExtStr = data.getCStr(&Offset); + E.ExtStr = Data.getCStr(&Offset); break; } } + return Error::success(); +} + +Error DWARFDebugMacro::MacroHeader::parseMacroHeader(DWARFDataExtractor Data, + uint64_t *Offset) { + Version = Data.getU16(Offset); + uint8_t FlagData = Data.getU8(Offset); + + // FIXME: Add support for parsing opcode_operands_table + if (FlagData & MACRO_OPCODE_OPERANDS_TABLE) + return createStringError(errc::not_supported, + "opcode_operands_table is not supported"); + Flags = FlagData; + if (Flags & MACRO_DEBUG_LINE_OFFSET) + DebugLineOffset = Data.getUnsigned(Offset, getOffsetByteSize()); + return Error::success(); } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp index ab71b239cb67..5031acdb54ef 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cstdint> @@ -18,44 +19,92 @@ using namespace llvm; using namespace dwarf; -DWARFDebugPubTable::DWARFDebugPubTable(const DWARFObject &Obj, - const DWARFSection &Sec, - bool LittleEndian, bool GnuStyle) - : GnuStyle(GnuStyle) { - DWARFDataExtractor PubNames(Obj, Sec, LittleEndian, 0); +void DWARFDebugPubTable::extract( + DWARFDataExtractor Data, bool GnuStyle, + function_ref<void(Error)> RecoverableErrorHandler) { + this->GnuStyle = GnuStyle; + Sets.clear(); uint64_t Offset = 0; - while (PubNames.isValidOffset(Offset)) { + while (Data.isValidOffset(Offset)) { + uint64_t SetOffset = Offset; Sets.push_back({}); - Set &SetData = Sets.back(); + Set &NewSet = Sets.back(); - SetData.Length = PubNames.getU32(&Offset); - SetData.Version = PubNames.getU16(&Offset); - SetData.Offset = PubNames.getRelocatedValue(4, &Offset); - SetData.Size = PubNames.getU32(&Offset); + DataExtractor::Cursor C(Offset); + std::tie(NewSet.Length, NewSet.Format) = Data.getInitialLength(C); + if (!C) { + // Drop the newly added set because it does not contain anything useful + // to dump. + Sets.pop_back(); + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "name lookup table at offset 0x%" PRIx64 " parsing failed: %s", + SetOffset, toString(C.takeError()).c_str())); + return; + } + + Offset = C.tell() + NewSet.Length; + DWARFDataExtractor SetData(Data, Offset); + const unsigned OffsetSize = dwarf::getDwarfOffsetByteSize(NewSet.Format); - while (Offset < Sec.Data.size()) { - uint32_t DieRef = PubNames.getU32(&Offset); + NewSet.Version = SetData.getU16(C); + NewSet.Offset = SetData.getRelocatedValue(C, OffsetSize); + NewSet.Size = SetData.getUnsigned(C, OffsetSize); + + if (!C) { + // Preserve the newly added set because at least some fields of the header + // are read and can be dumped. + RecoverableErrorHandler( + createStringError(errc::invalid_argument, + "name lookup table at offset 0x%" PRIx64 + " does not have a complete header: %s", + SetOffset, toString(C.takeError()).c_str())); + continue; + } + + while (C) { + uint64_t DieRef = SetData.getUnsigned(C, OffsetSize); if (DieRef == 0) break; - uint8_t IndexEntryValue = GnuStyle ? PubNames.getU8(&Offset) : 0; - StringRef Name = PubNames.getCStrRef(&Offset); - SetData.Entries.push_back( - {DieRef, PubIndexEntryDescriptor(IndexEntryValue), Name}); + uint8_t IndexEntryValue = GnuStyle ? SetData.getU8(C) : 0; + StringRef Name = SetData.getCStrRef(C); + if (C) + NewSet.Entries.push_back( + {DieRef, PubIndexEntryDescriptor(IndexEntryValue), Name}); + } + + if (!C) { + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "name lookup table at offset 0x%" PRIx64 " parsing failed: %s", + SetOffset, toString(C.takeError()).c_str())); + continue; } + if (C.tell() != Offset) + RecoverableErrorHandler(createStringError( + errc::invalid_argument, + "name lookup table at offset 0x%" PRIx64 + " has a terminator at offset 0x%" PRIx64 + " before the expected end at 0x%" PRIx64, + SetOffset, C.tell() - OffsetSize, Offset - OffsetSize)); } } void DWARFDebugPubTable::dump(raw_ostream &OS) const { for (const Set &S : Sets) { - OS << "length = " << format("0x%08x", S.Length); - OS << " version = " << format("0x%04x", S.Version); - OS << " unit_offset = " << format("0x%08" PRIx64, S.Offset); - OS << " unit_size = " << format("0x%08x", S.Size) << '\n'; + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(S.Format); + OS << "length = " << format("0x%0*" PRIx64, OffsetDumpWidth, S.Length); + OS << ", format = " << dwarf::FormatString(S.Format); + OS << ", version = " << format("0x%04x", S.Version); + OS << ", unit_offset = " + << format("0x%0*" PRIx64, OffsetDumpWidth, S.Offset); + OS << ", unit_size = " << format("0x%0*" PRIx64, OffsetDumpWidth, S.Size) + << '\n'; OS << (GnuStyle ? "Offset Linkage Kind Name\n" : "Offset Name\n"); for (const Entry &E : S.Entries) { - OS << format("0x%8.8" PRIx64 " ", E.SecOffset); + OS << format("0x%0*" PRIx64 " ", OffsetDumpWidth, E.SecOffset); if (GnuStyle) { StringRef EntryLinkage = GDBIndexEntryLinkageString(E.Descriptor.Linkage); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index c1dc3b68c6ab..81a6b5dcd5e7 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -79,7 +79,7 @@ static void dumpLocation(raw_ostream &OS, DWARFFormValue &FormValue, ArrayRef<uint8_t> Expr = *FormValue.getAsBlock(); DataExtractor Data(StringRef((const char *)Expr.data(), Expr.size()), Ctx.isLittleEndian(), 0); - DWARFExpression(Data, U->getVersion(), U->getAddressByteSize()) + DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format) .print(OS, MRI, U); return; } @@ -317,8 +317,9 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die, dumpRanges(Obj, OS, RangesOrError.get(), U->getAddressByteSize(), sizeof(BaseIndent) + Indent + 4, DumpOpts); else - WithColor::error() << "decoding address ranges: " - << toString(RangesOrError.takeError()) << '\n'; + DumpOpts.RecoverableErrorHandler(createStringError( + errc::invalid_argument, "decoding address ranges: %s", + toString(RangesOrError.takeError()).c_str())); } OS << ")\n"; @@ -356,7 +357,7 @@ DWARFDie::find(ArrayRef<dwarf::Attribute> Attrs) const { Optional<DWARFFormValue> DWARFDie::findRecursively(ArrayRef<dwarf::Attribute> Attrs) const { - std::vector<DWARFDie> Worklist; + SmallVector<DWARFDie, 3> Worklist; Worklist.push_back(*this); // Keep track if DIEs already seen to prevent infinite recursion. @@ -531,14 +532,26 @@ const char *DWARFDie::getName(DINameKind Kind) const { return nullptr; // Try to get mangled name only if it was asked for. if (Kind == DINameKind::LinkageName) { - if (auto Name = dwarf::toString( - findRecursively({DW_AT_MIPS_linkage_name, DW_AT_linkage_name}), - nullptr)) + if (auto Name = getLinkageName()) return Name; } - if (auto Name = dwarf::toString(findRecursively(DW_AT_name), nullptr)) - return Name; - return nullptr; + return getShortName(); +} + +const char *DWARFDie::getShortName() const { + if (!isValid()) + return nullptr; + + return dwarf::toString(findRecursively(dwarf::DW_AT_name), nullptr); +} + +const char *DWARFDie::getLinkageName() const { + if (!isValid()) + return nullptr; + + return dwarf::toString(findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr); } uint64_t DWARFDie::getDeclLine() const { diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp index 7d817d8a9925..de5e11e084f4 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp @@ -8,7 +8,6 @@ #include "llvm/DebugInfo/DWARF/DWARFExpression.h" #include "llvm/DebugInfo/DWARF/DWARFUnit.h" -#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Format.h" #include <cassert> @@ -94,7 +93,7 @@ static DescVector getDescriptions() { Desc(Op::Dwarf3, Op::SizeLEB, Op::SizeBlock); Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf3); Descriptions[DW_OP_WASM_location] = - Desc(Op::Dwarf4, Op::SizeLEB, Op::SignedSizeLEB); + Desc(Op::Dwarf4, Op::SizeLEB, Op::WasmLocationArg); Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3); Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB); Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB); @@ -103,6 +102,8 @@ static DescVector getDescriptions() { Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef); Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB); + Descriptions[DW_OP_regval_type] = + Desc(Op::Dwarf5, Op::SizeLEB, Op::BaseTypeRef); return Descriptions; } @@ -116,19 +117,15 @@ static DWARFExpression::Operation::Description getOpDesc(unsigned OpCode) { return Descriptions[OpCode]; } -static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { - return (Version == 2) ? AddrSize : 4; -} - -bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, - uint8_t AddressSize, uint64_t Offset) { +bool DWARFExpression::Operation::extract(DataExtractor Data, + uint8_t AddressSize, uint64_t Offset, + Optional<DwarfFormat> Format) { + EndOffset = Offset; Opcode = Data.getU8(&Offset); Desc = getOpDesc(Opcode); - if (Desc.Version == Operation::DwarfNA) { - EndOffset = Offset; + if (Desc.Version == Operation::DwarfNA) return false; - } for (unsigned Operand = 0; Operand < 2; ++Operand) { unsigned Size = Desc.Op[Operand]; @@ -157,24 +154,13 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, Operands[Operand] = Data.getU64(&Offset); break; case Operation::SizeAddr: - if (AddressSize == 8) { - Operands[Operand] = Data.getU64(&Offset); - } else if (AddressSize == 4) { - Operands[Operand] = Data.getU32(&Offset); - } else { - assert(AddressSize == 2); - Operands[Operand] = Data.getU16(&Offset); - } + Operands[Operand] = Data.getUnsigned(&Offset, AddressSize); break; case Operation::SizeRefAddr: - if (getRefAddrSize(AddressSize, Version) == 8) { - Operands[Operand] = Data.getU64(&Offset); - } else if (getRefAddrSize(AddressSize, Version) == 4) { - Operands[Operand] = Data.getU32(&Offset); - } else { - assert(getRefAddrSize(AddressSize, Version) == 2); - Operands[Operand] = Data.getU16(&Offset); - } + if (!Format) + return false; + Operands[Operand] = + Data.getUnsigned(&Offset, dwarf::getDwarfOffsetByteSize(*Format)); break; case Operation::SizeLEB: if (Signed) @@ -185,6 +171,19 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, case Operation::BaseTypeRef: Operands[Operand] = Data.getULEB128(&Offset); break; + case Operation::WasmLocationArg: + assert(Operand == 1); + switch (Operands[0]) { + case 0: case 1: case 2: + Operands[Operand] = Data.getULEB128(&Offset); + break; + case 3: // global as uint32 + Operands[Operand] = Data.getU32(&Offset); + break; + default: + return false; // Unknown Wasm location + } + break; case Operation::SizeBlock: // We need a size, so this cannot be the first operand if (Operand == 0) @@ -204,7 +203,21 @@ bool DWARFExpression::Operation::extract(DataExtractor Data, uint16_t Version, return true; } -static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode, +static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS, + uint64_t Operands[2], unsigned Operand) { + assert(Operand < 2 && "operand out of bounds"); + auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); + if (Die && Die.getTag() == dwarf::DW_TAG_base_type) { + OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]); + if (auto Name = Die.find(dwarf::DW_AT_name)) + OS << " \"" << Name->getAsCString() << "\""; + } else { + OS << format(" <invalid base_type ref: 0x%" PRIx64 ">", + Operands[Operand]); + } +} + +static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS, uint8_t Opcode, uint64_t Operands[2], const MCRegisterInfo *MRI, bool isEH) { if (!MRI) @@ -213,7 +226,8 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode, uint64_t DwarfRegNum; unsigned OpNum = 0; - if (Opcode == DW_OP_bregx || Opcode == DW_OP_regx) + if (Opcode == DW_OP_bregx || Opcode == DW_OP_regx || + Opcode == DW_OP_regval_type) DwarfRegNum = Operands[OpNum++]; else if (Opcode >= DW_OP_breg0 && Opcode < DW_OP_bregx) DwarfRegNum = Opcode - DW_OP_breg0; @@ -227,6 +241,9 @@ static bool prettyPrintRegisterOp(raw_ostream &OS, uint8_t Opcode, OS << format(" %s%+" PRId64, RegName, Operands[OpNum]); else OS << ' ' << RegName; + + if (Opcode == DW_OP_regval_type) + prettyPrintBaseTypeRef(U, OS, Operands, 1); return true; } } @@ -250,8 +267,9 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, if ((Opcode >= DW_OP_breg0 && Opcode <= DW_OP_breg31) || (Opcode >= DW_OP_reg0 && Opcode <= DW_OP_reg31) || - Opcode == DW_OP_bregx || Opcode == DW_OP_regx) - if (prettyPrintRegisterOp(OS, Opcode, Operands, RegInfo, isEH)) + Opcode == DW_OP_bregx || Opcode == DW_OP_regx || + Opcode == DW_OP_regval_type) + if (prettyPrintRegisterOp(U, OS, Opcode, Operands, RegInfo, isEH)) return true; for (unsigned Operand = 0; Operand < 2; ++Operand) { @@ -262,14 +280,21 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, break; if (Size == Operation::BaseTypeRef && U) { - auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); - if (Die && Die.getTag() == dwarf::DW_TAG_base_type) { - OS << format(" (0x%08" PRIx64 ")", U->getOffset() + Operands[Operand]); - if (auto Name = Die.find(dwarf::DW_AT_name)) - OS << " \"" << Name->getAsCString() << "\""; - } else { - OS << format(" <invalid base_type ref: 0x%" PRIx64 ">", - Operands[Operand]); + // For DW_OP_convert the operand may be 0 to indicate that conversion to + // the generic type should be done. The same holds for DW_OP_reinterpret, + // which is currently not supported. + if (Opcode == DW_OP_convert && Operands[Operand] == 0) + OS << " 0x0"; + else + prettyPrintBaseTypeRef(U, OS, Operands, Operand); + } else if (Size == Operation::WasmLocationArg) { + assert(Operand == 1); + switch (Operands[0]) { + case 0: case 1: case 2: + case 3: // global as uint32 + OS << format(" 0x%" PRIx64, Operands[Operand]); + break; + default: assert(false); } } else if (Size == Operation::SizeBlock) { uint64_t Offset = Operands[Operand]; @@ -324,6 +349,12 @@ bool DWARFExpression::Operation::verify(DWARFUnit *U) { break; if (Size == Operation::BaseTypeRef) { + // For DW_OP_convert the operand may be 0 to indicate that conversion to + // the generic type should be done, so don't look up a base type in that + // case. The same holds for DW_OP_reinterpret, which is currently not + // supported. + if (Opcode == DW_OP_convert && Operands[Operand] == 0) + continue; auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]); if (!Die || Die.getTag() != dwarf::DW_TAG_base_type) { Error = true; @@ -343,4 +374,126 @@ bool DWARFExpression::verify(DWARFUnit *U) { return true; } +/// A user-facing string representation of a DWARF expression. This might be an +/// Address expression, in which case it will be implicitly dereferenced, or a +/// Value expression. +struct PrintedExpr { + enum ExprKind { + Address, + Value, + }; + ExprKind Kind; + SmallString<16> String; + + PrintedExpr(ExprKind K = Address) : Kind(K) {} +}; + +static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I, + const DWARFExpression::iterator E, + const MCRegisterInfo &MRI) { + SmallVector<PrintedExpr, 4> Stack; + + while (I != E) { + DWARFExpression::Operation &Op = *I; + uint8_t Opcode = Op.getCode(); + switch (Opcode) { + case dwarf::DW_OP_regx: { + // DW_OP_regx: A register, with the register num given as an operand. + // Printed as the plain register name. + uint64_t DwarfRegNum = Op.getRawOperand(0); + Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << "<unknown register " << DwarfRegNum << ">"; + return false; + } + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << MRI.getName(*LLVMRegNum); + break; + } + case dwarf::DW_OP_bregx: { + int DwarfRegNum = Op.getRawOperand(0); + int64_t Offset = Op.getRawOperand(1); + Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << "<unknown register " << DwarfRegNum << ">"; + return false; + } + raw_svector_ostream S(Stack.emplace_back().String); + S << MRI.getName(*LLVMRegNum); + if (Offset) + S << format("%+" PRId64, Offset); + break; + } + case dwarf::DW_OP_entry_value: + case dwarf::DW_OP_GNU_entry_value: { + // DW_OP_entry_value contains a sub-expression which must be rendered + // separately. + uint64_t SubExprLength = Op.getRawOperand(0); + DWARFExpression::iterator SubExprEnd = I.skipBytes(SubExprLength); + ++I; + raw_svector_ostream S(Stack.emplace_back().String); + S << "entry("; + printCompactDWARFExpr(S, I, SubExprEnd, MRI); + S << ")"; + I = SubExprEnd; + continue; + } + case dwarf::DW_OP_stack_value: { + // The top stack entry should be treated as the actual value of tne + // variable, rather than the address of the variable in memory. + assert(!Stack.empty()); + Stack.back().Kind = PrintedExpr::Value; + break; + } + default: + if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) { + // DW_OP_reg<N>: A register, with the register num implied by the + // opcode. Printed as the plain register name. + uint64_t DwarfRegNum = Opcode - dwarf::DW_OP_reg0; + Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << "<unknown register " << DwarfRegNum << ">"; + return false; + } + raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String); + S << MRI.getName(*LLVMRegNum); + } else if (Opcode >= dwarf::DW_OP_breg0 && + Opcode <= dwarf::DW_OP_breg31) { + int DwarfRegNum = Opcode - dwarf::DW_OP_breg0; + int64_t Offset = Op.getRawOperand(0); + Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false); + if (!LLVMRegNum) { + OS << "<unknown register " << DwarfRegNum << ">"; + return false; + } + raw_svector_ostream S(Stack.emplace_back().String); + S << MRI.getName(*LLVMRegNum); + if (Offset) + S << format("%+" PRId64, Offset); + } else { + // If we hit an unknown operand, we don't know its effect on the stack, + // so bail out on the whole expression. + OS << "<unknown op " << dwarf::OperationEncodingString(Opcode) << " (" + << (int)Opcode << ")>"; + return false; + } + break; + } + ++I; + } + + assert(Stack.size() == 1 && "expected one value on stack"); + + if (Stack.front().Kind == PrintedExpr::Address) + OS << "[" << Stack.front().String << "]"; + else + OS << Stack.front().String; + + return true; +} + +bool DWARFExpression::printCompact(raw_ostream &OS, const MCRegisterInfo &MRI) { + return printCompactDWARFExpr(OS, begin(), end(), MRI); +} + } // namespace llvm diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index e97ae81345b8..a7da5acc380b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -241,11 +241,13 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, Ctx = &CU->getContext(); C = Ctx; U = CU; + Format = FP.Format; bool Indirect = false; bool IsBlock = false; Value.data = nullptr; // Read the value for the form into value and follow and DW_FORM_indirect // instances we run into + Error Err = Error::success(); do { Indirect = false; switch (Form) { @@ -253,24 +255,25 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_ref_addr: { uint16_t Size = (Form == DW_FORM_addr) ? FP.AddrSize : FP.getRefAddrByteSize(); - Value.uval = Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex); + Value.uval = + Data.getRelocatedValue(Size, OffsetPtr, &Value.SectionIndex, &Err); break; } case DW_FORM_exprloc: case DW_FORM_block: - Value.uval = Data.getULEB128(OffsetPtr); + Value.uval = Data.getULEB128(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block1: - Value.uval = Data.getU8(OffsetPtr); + Value.uval = Data.getU8(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block2: - Value.uval = Data.getU16(OffsetPtr); + Value.uval = Data.getU16(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_block4: - Value.uval = Data.getU32(OffsetPtr); + Value.uval = Data.getU32(OffsetPtr, &Err); IsBlock = true; break; case DW_FORM_data1: @@ -278,28 +281,28 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_flag: case DW_FORM_strx1: case DW_FORM_addrx1: - Value.uval = Data.getU8(OffsetPtr); + Value.uval = Data.getU8(OffsetPtr, &Err); break; case DW_FORM_data2: case DW_FORM_ref2: case DW_FORM_strx2: case DW_FORM_addrx2: - Value.uval = Data.getU16(OffsetPtr); + Value.uval = Data.getU16(OffsetPtr, &Err); break; case DW_FORM_strx3: - Value.uval = Data.getU24(OffsetPtr); + Value.uval = Data.getU24(OffsetPtr, &Err); break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: - Value.uval = Data.getRelocatedValue(4, OffsetPtr); + Value.uval = Data.getRelocatedValue(4, OffsetPtr, nullptr, &Err); break; case DW_FORM_data8: case DW_FORM_ref8: case DW_FORM_ref_sup8: - Value.uval = Data.getRelocatedValue(8, OffsetPtr); + Value.uval = Data.getRelocatedValue(8, OffsetPtr, nullptr, &Err); break; case DW_FORM_data16: // Treat this like a 16-byte block. @@ -307,19 +310,23 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, IsBlock = true; break; case DW_FORM_sdata: - Value.sval = Data.getSLEB128(OffsetPtr); + Value.sval = Data.getSLEB128(OffsetPtr, &Err); break; case DW_FORM_udata: case DW_FORM_ref_udata: case DW_FORM_rnglistx: case DW_FORM_loclistx: - Value.uval = Data.getULEB128(OffsetPtr); + case DW_FORM_GNU_addr_index: + case DW_FORM_GNU_str_index: + case DW_FORM_addrx: + case DW_FORM_strx: + Value.uval = Data.getULEB128(OffsetPtr, &Err); break; case DW_FORM_string: - Value.cstr = Data.getCStr(OffsetPtr); + Value.cstr = Data.getCStr(OffsetPtr, &Err); break; case DW_FORM_indirect: - Form = static_cast<dwarf::Form>(Data.getULEB128(OffsetPtr)); + Form = static_cast<dwarf::Form>(Data.getULEB128(OffsetPtr, &Err)); Indirect = true; break; case DW_FORM_strp: @@ -328,39 +335,27 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data, case DW_FORM_GNU_strp_alt: case DW_FORM_line_strp: case DW_FORM_strp_sup: { - Value.uval = - Data.getRelocatedValue(FP.getDwarfOffsetByteSize(), OffsetPtr); + Value.uval = Data.getRelocatedValue(FP.getDwarfOffsetByteSize(), + OffsetPtr, nullptr, &Err); break; } case DW_FORM_flag_present: Value.uval = 1; break; case DW_FORM_ref_sig8: - Value.uval = Data.getU64(OffsetPtr); - break; - case DW_FORM_GNU_addr_index: - case DW_FORM_GNU_str_index: - case DW_FORM_addrx: - case DW_FORM_strx: - Value.uval = Data.getULEB128(OffsetPtr); + Value.uval = Data.getU64(OffsetPtr, &Err); break; default: // DWARFFormValue::skipValue() will have caught this and caused all // DWARF DIEs to fail to be parsed, so this code is not be reachable. llvm_unreachable("unsupported form"); } - } while (Indirect); + } while (Indirect && !Err); - if (IsBlock) { - StringRef Str = Data.getData().substr(*OffsetPtr, Value.uval); - Value.data = nullptr; - if (!Str.empty()) { - Value.data = Str.bytes_begin(); - *OffsetPtr += Value.uval; - } - } + if (IsBlock) + Value.data = Data.getBytes(OffsetPtr, Value.uval, &Err).bytes_begin(); - return true; + return !errorToBool(std::move(Err)); } void DWARFFormValue::dumpSectionedAddress(raw_ostream &OS, @@ -392,6 +387,7 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { raw_ostream &AddrOS = DumpOpts.ShowAddresses ? WithColor(OS, HighlightColor::Address).get() : nulls(); + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); switch (Form) { case DW_FORM_addr: dumpSectionedAddress(AddrOS, DumpOpts, {Value.uval, Value.SectionIndex}); @@ -487,12 +483,13 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { break; case DW_FORM_strp: if (DumpOpts.Verbose) - OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)UValue); + OS << format(" .debug_str[0x%0*" PRIx64 "] = ", OffsetDumpWidth, UValue); dumpString(OS); break; case DW_FORM_line_strp: if (DumpOpts.Verbose) - OS << format(" .debug_line_str[0x%8.8x] = ", (uint32_t)UValue); + OS << format(" .debug_line_str[0x%0*" PRIx64 "] = ", OffsetDumpWidth, + UValue); dumpString(OS); break; case DW_FORM_strx: @@ -556,9 +553,8 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { OS << format("indexed (0x%x) loclist = ", (uint32_t)UValue); break; - // Should be formatted to 64-bit for DWARF64. case DW_FORM_sec_offset: - AddrOS << format("0x%08x", (uint32_t)UValue); + AddrOS << format("0x%0*" PRIx64, OffsetDumpWidth, UValue); break; default: diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp index 269ea9f79a6e..2124a49bef60 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp @@ -18,34 +18,24 @@ using namespace llvm; Error DWARFListTableHeader::extract(DWARFDataExtractor Data, uint64_t *OffsetPtr) { HeaderOffset = *OffsetPtr; - // Read and verify the length field. - if (!Data.isValidOffsetForDataOfSize(*OffsetPtr, sizeof(uint32_t))) - return createStringError(errc::invalid_argument, - "section is not large enough to contain a " - "%s table length at offset 0x%" PRIx64, - SectionName.data(), *OffsetPtr); - Format = dwarf::DwarfFormat::DWARF32; - uint8_t OffsetByteSize = 4; - HeaderData.Length = Data.getRelocatedValue(4, OffsetPtr); - if (HeaderData.Length == dwarf::DW_LENGTH_DWARF64) { - Format = dwarf::DwarfFormat::DWARF64; - OffsetByteSize = 8; - HeaderData.Length = Data.getU64(OffsetPtr); - } else if (HeaderData.Length >= dwarf::DW_LENGTH_lo_reserved) { - return createStringError(errc::invalid_argument, - "%s table at offset 0x%" PRIx64 - " has unsupported reserved unit length of value 0x%8.8" PRIx64, - SectionName.data(), HeaderOffset, HeaderData.Length); - } + Error Err = Error::success(); + + std::tie(HeaderData.Length, Format) = Data.getInitialLength(OffsetPtr, &Err); + if (Err) + return createStringError( + errc::invalid_argument, "parsing %s table at offset 0x%" PRIx64 ": %s", + SectionName.data(), HeaderOffset, toString(std::move(Err)).c_str()); + + uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4; uint64_t FullLength = HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format); - assert(FullLength == length()); if (FullLength < getHeaderSize(Format)) return createStringError(errc::invalid_argument, "%s table at offset 0x%" PRIx64 " has too small length (0x%" PRIx64 ") to contain a complete header", SectionName.data(), HeaderOffset, FullLength); + assert(FullLength == length() && "Inconsistent calculation of length."); uint64_t End = HeaderOffset + FullLength; if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength)) return createStringError(errc::invalid_argument, @@ -89,20 +79,22 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data, void DWARFListTableHeader::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const { if (DumpOpts.Verbose) OS << format("0x%8.8" PRIx64 ": ", HeaderOffset); - OS << format( - "%s list header: length = 0x%8.8" PRIx64 ", version = 0x%4.4" PRIx16 ", " - "addr_size = 0x%2.2" PRIx8 ", seg_size = 0x%2.2" PRIx8 - ", offset_entry_count = " - "0x%8.8" PRIx32 "\n", - ListTypeString.data(), HeaderData.Length, HeaderData.Version, - HeaderData.AddrSize, HeaderData.SegSize, HeaderData.OffsetEntryCount); + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(Format); + OS << format("%s list header: length = 0x%0*" PRIx64, ListTypeString.data(), + OffsetDumpWidth, HeaderData.Length) + << ", format = " << dwarf::FormatString(Format) + << format(", version = 0x%4.4" PRIx16 ", addr_size = 0x%2.2" PRIx8 + ", seg_size = 0x%2.2" PRIx8 + ", offset_entry_count = 0x%8.8" PRIx32 "\n", + HeaderData.Version, HeaderData.AddrSize, HeaderData.SegSize, + HeaderData.OffsetEntryCount); if (HeaderData.OffsetEntryCount > 0) { OS << "offsets: ["; for (const auto &Off : Offsets) { - OS << format("\n0x%8.8" PRIx64, Off); + OS << format("\n0x%0*" PRIx64, OffsetDumpWidth, Off); if (DumpOpts.Verbose) - OS << format(" => 0x%8.8" PRIx64, + OS << format(" => 0x%08" PRIx64, Off + HeaderOffset + getHeaderSize(Format)); } OS << "\n]\n"; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp index bb81090ba25c..c219f34bbc31 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp @@ -20,25 +20,28 @@ using namespace llvm; void DWARFTypeUnit::dump(raw_ostream &OS, DIDumpOptions DumpOpts) { DWARFDie TD = getDIEForOffset(getTypeOffset() + getOffset()); const char *Name = TD.getName(DINameKind::ShortName); + int OffsetDumpWidth = 2 * dwarf::getDwarfOffsetByteSize(getFormat()); if (DumpOpts.SummarizeTypes) { OS << "name = '" << Name << "'" - << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " length = " << format("0x%08" PRIx64, getLength()) << '\n'; + << ", type_signature = " << format("0x%016" PRIx64, getTypeHash()) + << ", length = " << format("0x%0*" PRIx64, OffsetDumpWidth, getLength()) + << '\n'; return; } OS << format("0x%08" PRIx64, getOffset()) << ": Type Unit:" - << " length = " << format("0x%08" PRIx64, getLength()) - << " version = " << format("0x%04x", getVersion()); + << " length = " << format("0x%0*" PRIx64, OffsetDumpWidth, getLength()) + << ", format = " << dwarf::FormatString(getFormat()) + << ", version = " << format("0x%04x", getVersion()); if (getVersion() >= 5) - OS << " unit_type = " << dwarf::UnitTypeString(getUnitType()); - OS << " abbr_offset = " + OS << ", unit_type = " << dwarf::UnitTypeString(getUnitType()); + OS << ", abbr_offset = " << format("0x%04" PRIx64, getAbbreviations()->getOffset()) - << " addr_size = " << format("0x%02x", getAddressByteSize()) - << " name = '" << Name << "'" - << " type_signature = " << format("0x%016" PRIx64, getTypeHash()) - << " type_offset = " << format("0x%04" PRIx64, getTypeOffset()) + << ", addr_size = " << format("0x%02x", getAddressByteSize()) + << ", name = '" << Name << "'" + << ", type_signature = " << format("0x%016" PRIx64, getTypeHash()) + << ", type_offset = " << format("0x%04" PRIx64, getTypeOffset()) << " (next unit at " << format("0x%08" PRIx64, getNextUnitOffset()) << ")\n"; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 7bb019466161..a6d44f04e468 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" -#include "llvm/Support/WithColor.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -74,12 +73,15 @@ void DWARFUnitVector::addUnitsImpl( DWARFDataExtractor Data(Obj, InfoSection, LE, 0); if (!Data.isValidOffset(Offset)) return nullptr; - const DWARFUnitIndex *Index = nullptr; - if (IsDWO) - Index = &getDWARFUnitIndex(Context, SectionKind); DWARFUnitHeader Header; - if (!Header.extract(Context, Data, &Offset, SectionKind, Index, - IndexEntry)) + if (!Header.extract(Context, Data, &Offset, SectionKind)) + return nullptr; + if (!IndexEntry && IsDWO) { + const DWARFUnitIndex &Index = getDWARFUnitIndex( + Context, Header.isTypeUnit() ? DW_SECT_EXT_TYPES : DW_SECT_INFO); + IndexEntry = Index.getFromOffset(Header.getOffset()); + } + if (IndexEntry && !Header.applyIndexEntry(IndexEntry)) return nullptr; std::unique_ptr<DWARFUnit> U; if (Header.isTypeUnit()) @@ -140,7 +142,7 @@ DWARFUnit *DWARFUnitVector::getUnitForOffset(uint64_t Offset) const { DWARFUnit * DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) { - const auto *CUOff = E.getOffset(DW_SECT_INFO); + const auto *CUOff = E.getContribution(DW_SECT_INFO); if (!CUOff) return nullptr; @@ -182,20 +184,17 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section, if (IsDWO) { // If we are reading a package file, we need to adjust the location list // data based on the index entries. - StringRef Data = LocSection->Data; + StringRef Data = Header.getVersion() >= 5 + ? Context.getDWARFObj().getLoclistsDWOSection().Data + : LocSection->Data; if (auto *IndexEntry = Header.getIndexEntry()) - if (const auto *C = IndexEntry->getOffset(DW_SECT_LOC)) + if (const auto *C = IndexEntry->getContribution( + Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC)) Data = Data.substr(C->Offset, C->Length); - DWARFDataExtractor DWARFData = - Header.getVersion() >= 5 - ? DWARFDataExtractor(Context.getDWARFObj(), - Context.getDWARFObj().getLoclistsDWOSection(), - isLittleEndian, getAddressByteSize()) - : DWARFDataExtractor(Data, isLittleEndian, getAddressByteSize()); + DWARFDataExtractor DWARFData(Data, isLittleEndian, getAddressByteSize()); LocTable = std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion()); - } else if (Header.getVersion() >= 5) { LocTable = std::make_unique<DWARFDebugLoclists>( DWARFDataExtractor(Context.getDWARFObj(), @@ -255,20 +254,12 @@ Optional<uint64_t> DWARFUnit::getStringOffsetSectionItem(uint32_t Index) const { bool DWARFUnitHeader::extract(DWARFContext &Context, const DWARFDataExtractor &debug_info, uint64_t *offset_ptr, - DWARFSectionKind SectionKind, - const DWARFUnitIndex *Index, - const DWARFUnitIndex::Entry *Entry) { + DWARFSectionKind SectionKind) { Offset = *offset_ptr; Error Err = Error::success(); - IndexEntry = Entry; - if (!IndexEntry && Index) - IndexEntry = Index->getFromOffset(*offset_ptr); - Length = debug_info.getRelocatedValue(4, offset_ptr, nullptr, &Err); - FormParams.Format = DWARF32; - if (Length == dwarf::DW_LENGTH_DWARF64) { - Length = debug_info.getU64(offset_ptr, &Err); - FormParams.Format = DWARF64; - } + IndexEntry = nullptr; + std::tie(Length, FormParams.Format) = + debug_info.getInitialLength(offset_ptr, &Err); FormParams.Version = debug_info.getU16(offset_ptr, &Err); if (FormParams.Version >= 5) { UnitType = debug_info.getU8(offset_ptr, &Err); @@ -281,22 +272,11 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, FormParams.AddrSize = debug_info.getU8(offset_ptr, &Err); // Fake a unit type based on the section type. This isn't perfect, // but distinguishing compile and type units is generally enough. - if (SectionKind == DW_SECT_TYPES) + if (SectionKind == DW_SECT_EXT_TYPES) UnitType = DW_UT_type; else UnitType = DW_UT_compile; } - if (IndexEntry) { - if (AbbrOffset) - return false; - auto *UnitContrib = IndexEntry->getOffset(); - if (!UnitContrib || UnitContrib->Length != (Length + 4)) - return false; - auto *AbbrEntry = IndexEntry->getOffset(DW_SECT_ABBREV); - if (!AbbrEntry) - return false; - AbbrOffset = AbbrEntry->Offset; - } if (isTypeUnit()) { TypeHash = debug_info.getU64(offset_ptr, &Err); TypeOffset = debug_info.getUnsigned( @@ -320,7 +300,7 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, TypeOffset < getLength() + getUnitLengthFieldByteSize(); bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); bool VersionOK = DWARFContext::isSupportedVersion(getVersion()); - bool AddrSizeOK = getAddressByteSize() == 4 || getAddressByteSize() == 8; + bool AddrSizeOK = DWARFContext::isAddressSizeSupported(getAddressByteSize()); if (!LengthOK || !VersionOK || !AddrSizeOK || !TypeOffsetOK) return false; @@ -330,6 +310,23 @@ bool DWARFUnitHeader::extract(DWARFContext &Context, return true; } +bool DWARFUnitHeader::applyIndexEntry(const DWARFUnitIndex::Entry *Entry) { + assert(Entry); + assert(!IndexEntry); + IndexEntry = Entry; + if (AbbrOffset) + return false; + auto *UnitContrib = IndexEntry->getContribution(); + if (!UnitContrib || + UnitContrib->Length != (getLength() + getUnitLengthFieldByteSize())) + return false; + auto *AbbrEntry = IndexEntry->getContribution(DW_SECT_ABBREV); + if (!AbbrEntry) + return false; + AbbrOffset = AbbrEntry->Offset; + return true; +} + // Parse the rangelist table header, including the optional array of offsets // following it (DWARF v5 and later). template<typename ListTableType> @@ -426,15 +423,17 @@ void DWARFUnit::extractDIEsToVector( // should always terminate at or before the start of the next compilation // unit header). if (DIEOffset > NextCUOffset) - WithColor::warning() << format("DWARF compile unit extends beyond its " - "bounds cu 0x%8.8" PRIx64 " " - "at 0x%8.8" PRIx64 "\n", - getOffset(), DIEOffset); + Context.getWarningHandler()( + createStringError(errc::invalid_argument, + "DWARF compile unit extends beyond its " + "bounds cu 0x%8.8" PRIx64 " " + "at 0x%8.8" PRIx64 "\n", + getOffset(), DIEOffset)); } void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { if (Error e = tryExtractDIEsIfNeeded(CUDieOnly)) - WithColor::error() << toString(std::move(e)); + Context.getRecoverableErrorHandler()(std::move(e)); } Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { @@ -492,9 +491,17 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { // DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to // describe address ranges. if (getVersion() >= 5) { - if (IsDWO) - setRangesSection(&Context.getDWARFObj().getRnglistsDWOSection(), 0); - else + // In case of DWP, the base offset from the index has to be added. + uint64_t ContributionBaseOffset = 0; + if (IsDWO) { + if (auto *IndexEntry = Header.getIndexEntry()) + if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS)) + ContributionBaseOffset = Contrib->Offset; + setRangesSection( + &Context.getDWARFObj().getRnglistsDWOSection(), + ContributionBaseOffset + + DWARFListTableHeader::getHeaderSize(Header.getFormat())); + } else setRangesSection(&Context.getDWARFObj().getRnglistsSection(), toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0)); if (RangeSection->Data.size()) { @@ -514,19 +521,26 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { // In a split dwarf unit, there is no DW_AT_rnglists_base attribute. // Adjust RangeSectionBase to point past the table header. if (IsDWO && RngListTable) - RangeSectionBase = RngListTable->getHeaderSize(); + RangeSectionBase = + ContributionBaseOffset + RngListTable->getHeaderSize(); } // In a split dwarf unit, there is no DW_AT_loclists_base attribute. // Setting LocSectionBase to point past the table header. - if (IsDWO) - setLocSection(&Context.getDWARFObj().getLoclistsDWOSection(), + if (IsDWO) { + auto &DWOSection = Context.getDWARFObj().getLoclistsDWOSection(); + if (DWOSection.Data.empty()) + return Error::success(); + setLocSection(&DWOSection, DWARFListTableHeader::getHeaderSize(Header.getFormat())); - else + } else if (auto X = UnitDie.find(DW_AT_loclists_base)) { setLocSection(&Context.getDWARFObj().getLoclistsSection(), - toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0)); + toSectionOffset(X, 0)); + } else { + return Error::success(); + } - if (LocSection->Data.size()) { + if (LocSection) { if (IsDWO) LoclistTableHeader.emplace(".debug_loclists.dwo", "locations"); else @@ -542,6 +556,9 @@ Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) { " list table with base = 0x%" PRIx64 "\n", Offset); Offset -= HeaderSize; + if (auto *IndexEntry = Header.getIndexEntry()) + if (const auto *Contrib = IndexEntry->getContribution(DW_SECT_LOCLISTS)) + Offset += Contrib->Offset; if (Error E = LoclistTableHeader->extract(Data, &Offset)) return createStringError(errc::invalid_argument, "parsing a loclist table: " + @@ -596,9 +613,10 @@ bool DWARFUnit::parseDWO() { RangesDA, RangeSectionBase, Header.getFormat())) DWO->RngListTable = TableOrError.get(); else - WithColor::error() << "parsing a range list table: " - << toString(TableOrError.takeError()) - << '\n'; + Context.getRecoverableErrorHandler()(createStringError( + errc::invalid_argument, "parsing a range list table: %s", + toString(TableOrError.takeError()).c_str())); + if (DWO->RngListTable) DWO->RangeSectionBase = DWO->RngListTable->getHeaderSize(); } else { @@ -759,7 +777,7 @@ const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context, DWARFSectionKind Kind) { if (Kind == DW_SECT_INFO) return Context.getCUIndex(); - assert(Kind == DW_SECT_TYPES); + assert(Kind == DW_SECT_EXT_TYPES); return Context.getTUIndex(); } @@ -944,18 +962,12 @@ parseDWARFStringOffsetsTableHeader(DWARFDataExtractor &DA, Expected<Optional<StrOffsetsContributionDescriptor>> DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) { - uint64_t Offset; - if (IsDWO) { - Offset = 0; - if (DA.getData().data() == nullptr) - return None; - } else { - auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base)); - if (!OptOffset) - return None; - Offset = *OptOffset; - } - auto DescOrError = parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), Offset); + assert(!IsDWO); + auto OptOffset = toSectionOffset(getUnitDIE().find(DW_AT_str_offsets_base)); + if (!OptOffset) + return None; + auto DescOrError = + parseDWARFStringOffsetsTableHeader(DA, Header.getFormat(), *OptOffset); if (!DescOrError) return DescOrError.takeError(); return *DescOrError; @@ -963,10 +975,11 @@ DWARFUnit::determineStringOffsetsTableContribution(DWARFDataExtractor &DA) { Expected<Optional<StrOffsetsContributionDescriptor>> DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) { + assert(IsDWO); uint64_t Offset = 0; auto IndexEntry = Header.getIndexEntry(); const auto *C = - IndexEntry ? IndexEntry->getOffset(DW_SECT_STR_OFFSETS) : nullptr; + IndexEntry ? IndexEntry->getContribution(DW_SECT_STR_OFFSETS) : nullptr; if (C) Offset = C->Offset; if (getVersion() >= 5) { @@ -983,11 +996,10 @@ DWARFUnit::determineStringOffsetsTableContributionDWO(DWARFDataExtractor & DA) { // index table (in a package file). In a .dwo file it is simply // the length of the string offsets section. if (!IndexEntry) - return { - Optional<StrOffsetsContributionDescriptor>( - {0, StringOffsetSection.Data.size(), 4, DWARF32})}; + return {Optional<StrOffsetsContributionDescriptor>( + {0, StringOffsetSection.Data.size(), 4, Header.getFormat()})}; if (C) return {Optional<StrOffsetsContributionDescriptor>( - {C->Offset, C->Length, 4, DWARF32})}; + {C->Offset, C->Length, 4, Header.getFormat()})}; return None; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index f29c1e6cc5c7..3d4cecce27db 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -17,19 +17,102 @@ using namespace llvm; +namespace { + +enum class DWARFSectionKindV2 { + DW_SECT_INFO = 1, + DW_SECT_TYPES = 2, + DW_SECT_ABBREV = 3, + DW_SECT_LINE = 4, + DW_SECT_LOC = 5, + DW_SECT_STR_OFFSETS = 6, + DW_SECT_MACINFO = 7, + DW_SECT_MACRO = 8, +}; + +} // namespace + +// Return true if the section identifier is defined in the DWARFv5 standard. +constexpr bool isKnownV5SectionID(uint32_t ID) { + return ID >= DW_SECT_INFO && ID <= DW_SECT_RNGLISTS && + ID != DW_SECT_EXT_TYPES; +} + +uint32_t llvm::serializeSectionKind(DWARFSectionKind Kind, + unsigned IndexVersion) { + if (IndexVersion == 5) { + assert(isKnownV5SectionID(Kind)); + return static_cast<uint32_t>(Kind); + } + assert(IndexVersion == 2); + switch (Kind) { +#define CASE(S,T) \ + case DW_SECT_##S: \ + return static_cast<uint32_t>(DWARFSectionKindV2::DW_SECT_##T) + CASE(INFO, INFO); + CASE(EXT_TYPES, TYPES); + CASE(ABBREV, ABBREV); + CASE(LINE, LINE); + CASE(EXT_LOC, LOC); + CASE(STR_OFFSETS, STR_OFFSETS); + CASE(EXT_MACINFO, MACINFO); + CASE(MACRO, MACRO); +#undef CASE + default: + // All other section kinds have no corresponding values in v2 indexes. + llvm_unreachable("Invalid DWARFSectionKind"); + } +} + +DWARFSectionKind llvm::deserializeSectionKind(uint32_t Value, + unsigned IndexVersion) { + if (IndexVersion == 5) + return isKnownV5SectionID(Value) + ? static_cast<DWARFSectionKind>(Value) + : DW_SECT_EXT_unknown; + assert(IndexVersion == 2); + switch (static_cast<DWARFSectionKindV2>(Value)) { +#define CASE(S,T) \ + case DWARFSectionKindV2::DW_SECT_##S: \ + return DW_SECT_##T + CASE(INFO, INFO); + CASE(TYPES, EXT_TYPES); + CASE(ABBREV, ABBREV); + CASE(LINE, LINE); + CASE(LOC, EXT_LOC); + CASE(STR_OFFSETS, STR_OFFSETS); + CASE(MACINFO, EXT_MACINFO); + CASE(MACRO, MACRO); +#undef CASE + } + return DW_SECT_EXT_unknown; +} + bool DWARFUnitIndex::Header::parse(DataExtractor IndexData, uint64_t *OffsetPtr) { + const uint64_t BeginOffset = *OffsetPtr; if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16)) return false; + // GCC Debug Fission defines the version as an unsigned 32-bit field + // with value of 2, https://gcc.gnu.org/wiki/DebugFissionDWP. + // DWARFv5 defines the same space as an uhalf version field with value of 5 + // and a 2 bytes long padding, see Section 7.3.5.3. Version = IndexData.getU32(OffsetPtr); + if (Version != 2) { + *OffsetPtr = BeginOffset; + Version = IndexData.getU16(OffsetPtr); + if (Version != 5) + return false; + *OffsetPtr += 2; // Skip padding. + } NumColumns = IndexData.getU32(OffsetPtr); NumUnits = IndexData.getU32(OffsetPtr); NumBuckets = IndexData.getU32(OffsetPtr); - return Version <= 2; + return true; } void DWARFUnitIndex::Header::dump(raw_ostream &OS) const { - OS << format("version = %u slots = %u\n\n", Version, NumBuckets); + OS << format("version = %u, units = %u, slots = %u\n\n", Version, NumUnits, NumBuckets); } bool DWARFUnitIndex::parse(DataExtractor IndexData) { @@ -49,6 +132,10 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { if (!Header.parse(IndexData, &Offset)) return false; + // Fix InfoColumnKind: in DWARFv5, type units are in .debug_info.dwo. + if (Header.Version == 5) + InfoColumnKind = DW_SECT_INFO; + if (!IndexData.isValidOffsetForDataOfSize( Offset, Header.NumBuckets * (8 + 4) + (2 * Header.NumUnits + 1) * 4 * Header.NumColumns)) @@ -58,6 +145,7 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { auto Contribs = std::make_unique<Entry::SectionContribution *[]>(Header.NumUnits); ColumnKinds = std::make_unique<DWARFSectionKind[]>(Header.NumColumns); + RawSectionIds = std::make_unique<uint32_t[]>(Header.NumColumns); // Read Hash Table of Signatures for (unsigned i = 0; i != Header.NumBuckets; ++i) @@ -76,7 +164,8 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { // Read the Column Headers for (unsigned i = 0; i != Header.NumColumns; ++i) { - ColumnKinds[i] = static_cast<DWARFSectionKind>(IndexData.getU32(&Offset)); + RawSectionIds[i] = IndexData.getU32(&Offset); + ColumnKinds[i] = deserializeSectionKind(RawSectionIds[i], Header.Version); if (ColumnKinds[i] == InfoColumnKind) { if (InfoColumn != -1) return false; @@ -105,20 +194,21 @@ bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) { } StringRef DWARFUnitIndex::getColumnHeader(DWARFSectionKind DS) { -#define CASE(DS) \ - case DW_SECT_##DS: \ - return #DS; switch (DS) { - CASE(INFO); - CASE(TYPES); - CASE(ABBREV); - CASE(LINE); - CASE(LOC); - CASE(STR_OFFSETS); - CASE(MACINFO); - CASE(MACRO); +#define HANDLE_DW_SECT(ID, NAME) \ + case DW_SECT_##NAME: \ + return #NAME; +#include "llvm/BinaryFormat/Dwarf.def" + case DW_SECT_EXT_TYPES: + return "TYPES"; + case DW_SECT_EXT_LOC: + return "LOC"; + case DW_SECT_EXT_MACINFO: + return "MACINFO"; + case DW_SECT_EXT_unknown: + return StringRef(); } - llvm_unreachable("unknown DWARFSectionKind"); + llvm_unreachable("Unknown DWARFSectionKind"); } void DWARFUnitIndex::dump(raw_ostream &OS) const { @@ -127,8 +217,14 @@ void DWARFUnitIndex::dump(raw_ostream &OS) const { Header.dump(OS); OS << "Index Signature "; - for (unsigned i = 0; i != Header.NumColumns; ++i) - OS << ' ' << left_justify(getColumnHeader(ColumnKinds[i]), 24); + for (unsigned i = 0; i != Header.NumColumns; ++i) { + DWARFSectionKind Kind = ColumnKinds[i]; + StringRef Name = getColumnHeader(Kind); + if (!Name.empty()) + OS << ' ' << left_justify(Name, 24); + else + OS << format(" Unknown: %-15" PRIu32, RawSectionIds[i]); + } OS << "\n----- ------------------"; for (unsigned i = 0; i != Header.NumColumns; ++i) OS << " ------------------------"; @@ -148,7 +244,7 @@ void DWARFUnitIndex::dump(raw_ostream &OS) const { } const DWARFUnitIndex::Entry::SectionContribution * -DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const { +DWARFUnitIndex::Entry::getContribution(DWARFSectionKind Sec) const { uint32_t i = 0; for (; i != Index->Header.NumColumns; ++i) if (Index->ColumnKinds[i] == Sec) @@ -157,7 +253,7 @@ DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const { } const DWARFUnitIndex::Entry::SectionContribution * -DWARFUnitIndex::Entry::getOffset() const { +DWARFUnitIndex::Entry::getContribution() const { return &Contributions[Index->InfoColumn]; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp index 1fd6c1d7d282..3a83317a73a3 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp @@ -26,24 +26,26 @@ using namespace llvm; using namespace dwarf; using namespace object; -DWARFVerifier::DieRangeInfo::address_range_iterator +Optional<DWARFAddressRange> DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) { auto Begin = Ranges.begin(); auto End = Ranges.end(); auto Pos = std::lower_bound(Begin, End, R); if (Pos != End) { - if (Pos->intersects(R)) - return std::move(Pos); - if (Pos != Begin) { - auto Iter = Pos - 1; - if (Iter->intersects(R)) - return std::move(Iter); - } + DWARFAddressRange Range(*Pos); + if (Pos->merge(R)) + return Range; + } + if (Pos != Begin) { + auto Iter = Pos - 1; + DWARFAddressRange Range(*Iter); + if (Iter->merge(R)) + return Range; } Ranges.insert(Pos, R); - return Ranges.end(); + return None; } DWARFVerifier::DieRangeInfo::die_range_info_iterator @@ -112,11 +114,9 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, bool ValidAbbrevOffset = true; uint64_t OffsetStart = *Offset; - Length = DebugInfoData.getU32(Offset); - if (Length == dwarf::DW_LENGTH_DWARF64) { - Length = DebugInfoData.getU64(Offset); - isUnitDWARF64 = true; - } + DwarfFormat Format; + std::tie(Length, Format) = DebugInfoData.getInitialLength(Offset); + isUnitDWARF64 = Format == DWARF64; Version = DebugInfoData.getU16(Offset); if (Version >= 5) { @@ -135,7 +135,7 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData, ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3); ValidVersion = DWARFContext::isSupportedVersion(Version); - ValidAddrSize = AddrSize == 4 || AddrSize == 8; + ValidAddrSize = DWARFContext::isAddressSizeSupported(AddrSize); if (!ValidLength || !ValidVersion || !ValidAddrSize || !ValidAbbrevOffset || !ValidType) { Success = false; @@ -307,7 +307,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>( DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), + DObj.getStrOffsetsSection(), &DObj.getAddrSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, TypeUnitVector)); break; @@ -321,7 +321,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S, Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>( DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(), &DObj.getLocSection(), DObj.getStrSection(), - DObj.getStrOffsetsSection(), &DObj.getAppleObjCSection(), + DObj.getStrOffsetsSection(), &DObj.getAddrSection(), DObj.getLineSection(), DCtx.isLittleEndian(), false, CompileUnitVector)); break; @@ -354,7 +354,7 @@ bool DWARFVerifier::handleDebugInfo() { OS << "Verifying .debug_types Unit Header Chain...\n"; DObj.forEachTypesSections([&](const DWARFSection &S) { - NumErrors += verifyUnitSection(S, DW_SECT_TYPES); + NumErrors += verifyUnitSection(S, DW_SECT_EXT_TYPES); }); return NumErrors == 0; } @@ -399,22 +399,30 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die, // processing an object file. if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) { + bool DumpDieAfterError = false; for (auto Range : Ranges) { if (!Range.valid()) { ++NumErrors; error() << "Invalid address range " << Range << "\n"; + DumpDieAfterError = true; continue; } - // Verify that ranges don't intersect. - const auto IntersectingRange = RI.insert(Range); - if (IntersectingRange != RI.Ranges.end()) { + // Verify that ranges don't intersect and also build up the DieRangeInfo + // address ranges. Don't break out of the loop below early, or we will + // think this DIE doesn't have all of the address ranges it is supposed + // to have. Compile units often have DW_AT_ranges that can contain one or + // more dead stripped address ranges which tend to all be at the same + // address: 0 or -1. + if (auto PrevRange = RI.insert(Range)) { ++NumErrors; - error() << "DIE has overlapping address ranges: " << Range << " and " - << *IntersectingRange << "\n"; - break; + error() << "DIE has overlapping ranges in DW_AT_ranges attribute: " + << *PrevRange << " and " << Range << '\n'; + DumpDieAfterError = true; } } + if (DumpDieAfterError) + dump(Die, 2) << '\n'; } // Verify that children don't intersect. @@ -459,8 +467,15 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, case DW_AT_ranges: // Make sure the offset in the DW_AT_ranges attribute is valid. if (auto SectionOffset = AttrValue.Value.getAsSectionOffset()) { - if (*SectionOffset >= DObj.getRangesSection().Data.size()) - ReportError("DW_AT_ranges offset is beyond .debug_ranges bounds:"); + unsigned DwarfVersion = Die.getDwarfUnit()->getVersion(); + const DWARFSection &RangeSection = DwarfVersion < 5 + ? DObj.getRangesSection() + : DObj.getRnglistsSection(); + if (*SectionOffset >= RangeSection.Data.size()) + ReportError( + "DW_AT_ranges offset is beyond " + + StringRef(DwarfVersion < 5 ? ".debug_ranges" : ".debug_rnglists") + + " bounds: " + llvm::formatv("{0:x8}", *SectionOffset)); break; } ReportError("DIE has invalid DW_AT_ranges encoding:"); @@ -481,8 +496,8 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die, DWARFUnit *U = Die.getDwarfUnit(); for (const auto &Entry : *Loc) { DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0); - DWARFExpression Expression(Data, U->getVersion(), - U->getAddressByteSize()); + DWARFExpression Expression(Data, U->getAddressByteSize(), + U->getFormParams().Format); bool Error = any_of(Expression, [](DWARFExpression::Operation &Op) { return Op.isError(); }); @@ -758,7 +773,7 @@ void DWARFVerifier::verifyDebugLineRows() { << "] row[" << RowIndex << "] decreases in address from previous row:\n"; - DWARFDebugLine::Row::dumpTableHeader(OS); + DWARFDebugLine::Row::dumpTableHeader(OS, 0); if (RowIndex > 0) LineTable->Rows[RowIndex - 1].dump(OS); Row.dump(OS); @@ -776,7 +791,7 @@ void DWARFVerifier::verifyDebugLineRows() { << " (valid values are [" << (isDWARF5 ? "0," : "1,") << LineTable->Prologue.FileNames.size() << (isDWARF5 ? ")" : "]") << "):\n"; - DWARFDebugLine::Row::dumpTableHeader(OS); + DWARFDebugLine::Row::dumpTableHeader(OS, 0); Row.dump(OS); OS << '\n'; } @@ -1290,7 +1305,8 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) { for (const auto &Entry : *Loc) { DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), U->getAddressByteSize()); - DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize()); + DWARFExpression Expression(Data, U->getAddressByteSize(), + U->getFormParams().Format); bool IsInteresting = any_of(Expression, [](DWARFExpression::Operation &Op) { return !Op.isError() && (Op.getCode() == DW_OP_addr || Op.getCode() == DW_OP_form_tls_address || @@ -1330,9 +1346,7 @@ unsigned DWARFVerifier::verifyNameIndexCompleteness( // "The name index must contain an entry for each debugging information entry // that defines a named subprogram, label, variable, type, or namespace, // subject to ..." - // Instead whitelisting all TAGs representing a "type" or a "subprogram", to - // make sure we catch any missing items, we instead blacklist all TAGs that we - // know shouldn't be indexed. + // Explicitly exclude all TAGs that we know shouldn't be indexed. switch (Die.getTag()) { // Compile units and modules have names but shouldn't be indexed. case DW_TAG_compile_unit: diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp new file mode 100644 index 000000000000..1e527ab3916e --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -0,0 +1,572 @@ +//===- DwarfTransformer.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <thread> +#include <unordered_set> + +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/DwarfTransformer.h" +#include "llvm/DebugInfo/GSYM/FunctionInfo.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" +#include "llvm/DebugInfo/GSYM/GsymReader.h" +#include "llvm/DebugInfo/GSYM/InlineInfo.h" + +using namespace llvm; +using namespace gsym; + +struct llvm::gsym::CUInfo { + const DWARFDebugLine::LineTable *LineTable; + const char *CompDir; + std::vector<uint32_t> FileCache; + uint64_t Language = 0; + uint8_t AddrSize = 0; + + CUInfo(DWARFContext &DICtx, DWARFCompileUnit *CU) { + LineTable = DICtx.getLineTableForUnit(CU); + CompDir = CU->getCompilationDir(); + FileCache.clear(); + if (LineTable) + FileCache.assign(LineTable->Prologue.FileNames.size() + 1, UINT32_MAX); + DWARFDie Die = CU->getUnitDIE(); + Language = dwarf::toUnsigned(Die.find(dwarf::DW_AT_language), 0); + AddrSize = CU->getAddressByteSize(); + } + + /// Return true if Addr is the highest address for a given compile unit. The + /// highest address is encoded as -1, of all ones in the address. These high + /// addresses are used by some linkers to indicate that a function has been + /// dead stripped or didn't end up in the linked executable. + bool isHighestAddress(uint64_t Addr) const { + if (AddrSize == 4) + return Addr == UINT32_MAX; + else if (AddrSize == 8) + return Addr == UINT64_MAX; + return false; + } + + /// Convert a DWARF compile unit file index into a GSYM global file index. + /// + /// Each compile unit in DWARF has its own file table in the line table + /// prologue. GSYM has a single large file table that applies to all files + /// from all of the info in a GSYM file. This function converts between the + /// two and caches and DWARF CU file index that has already been converted so + /// the first client that asks for a compile unit file index will end up + /// doing the conversion, and subsequent clients will get the cached GSYM + /// index. + uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) { + if (!LineTable) + return 0; + assert(DwarfFileIdx < FileCache.size()); + uint32_t &GsymFileIdx = FileCache[DwarfFileIdx]; + if (GsymFileIdx != UINT32_MAX) + return GsymFileIdx; + std::string File; + if (LineTable->getFileNameByIndex( + DwarfFileIdx, CompDir, + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) + GsymFileIdx = Gsym.insertFile(File); + else + GsymFileIdx = 0; + return GsymFileIdx; + } +}; + + +static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) { + if (DWARFDie SpecDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_specification)) { + if (DWARFDie SpecParent = GetParentDeclContextDIE(SpecDie)) + return SpecParent; + } + if (DWARFDie AbstDie = + Die.getAttributeValueAsReferencedDie(dwarf::DW_AT_abstract_origin)) { + if (DWARFDie AbstParent = GetParentDeclContextDIE(AbstDie)) + return AbstParent; + } + + // We never want to follow parent for inlined subroutine - that would + // give us information about where the function is inlined, not what + // function is inlined + if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine) + return DWARFDie(); + + DWARFDie ParentDie = Die.getParent(); + if (!ParentDie) + return DWARFDie(); + + switch (ParentDie.getTag()) { + case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_subprogram: + return ParentDie; // Found parent decl context DIE + case dwarf::DW_TAG_lexical_block: + return GetParentDeclContextDIE(ParentDie); + default: + break; + } + + return DWARFDie(); +} + +/// Get the GsymCreator string table offset for the qualified name for the +/// DIE passed in. This function will avoid making copies of any strings in +/// the GsymCreator when possible. We don't need to copy a string when the +/// string comes from our .debug_str section or is an inlined string in the +/// .debug_info. If we create a qualified name string in this function by +/// combining multiple strings in the DWARF string table or info, we will make +/// a copy of the string when we add it to the string table. +static Optional<uint32_t> getQualifiedNameIndex(DWARFDie &Die, + uint64_t Language, + GsymCreator &Gsym) { + // If the dwarf has mangled name, use mangled name + if (auto LinkageName = + dwarf::toString(Die.findRecursively({dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_AT_linkage_name}), + nullptr)) + return Gsym.insertString(LinkageName, /* Copy */ false); + + StringRef ShortName(Die.getName(DINameKind::ShortName)); + if (ShortName.empty()) + return llvm::None; + + // For C++ and ObjC, prepend names of all parent declaration contexts + if (!(Language == dwarf::DW_LANG_C_plus_plus || + Language == dwarf::DW_LANG_C_plus_plus_03 || + Language == dwarf::DW_LANG_C_plus_plus_11 || + Language == dwarf::DW_LANG_C_plus_plus_14 || + Language == dwarf::DW_LANG_ObjC_plus_plus || + // This should not be needed for C, but we see C++ code marked as C + // in some binaries. This should hurt, so let's do it for C as well + Language == dwarf::DW_LANG_C)) + return Gsym.insertString(ShortName, /* Copy */ false); + + // Some GCC optimizations create functions with names ending with .isra.<num> + // or .part.<num> and those names are just DW_AT_name, not DW_AT_linkage_name + // If it looks like it could be the case, don't add any prefix + if (ShortName.startswith("_Z") && + (ShortName.contains(".isra.") || ShortName.contains(".part."))) + return Gsym.insertString(ShortName, /* Copy */ false); + + DWARFDie ParentDeclCtxDie = GetParentDeclContextDIE(Die); + if (ParentDeclCtxDie) { + std::string Name = ShortName.str(); + while (ParentDeclCtxDie) { + StringRef ParentName(ParentDeclCtxDie.getName(DINameKind::ShortName)); + if (!ParentName.empty()) { + // "lambda" names are wrapped in < >. Replace with { } + // to be consistent with demangled names and not to confuse with + // templates + if (ParentName.front() == '<' && ParentName.back() == '>') + Name = "{" + ParentName.substr(1, ParentName.size() - 2).str() + "}" + + "::" + Name; + else + Name = ParentName.str() + "::" + Name; + } + ParentDeclCtxDie = GetParentDeclContextDIE(ParentDeclCtxDie); + } + // Copy the name since we created a new name in a std::string. + return Gsym.insertString(Name, /* Copy */ true); + } + // Don't copy the name since it exists in the DWARF object file. + return Gsym.insertString(ShortName, /* Copy */ false); +} + +static bool hasInlineInfo(DWARFDie Die, uint32_t Depth) { + bool CheckChildren = true; + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: + // Don't look into functions within functions. + CheckChildren = Depth == 0; + break; + case dwarf::DW_TAG_inlined_subroutine: + return true; + default: + break; + } + if (!CheckChildren) + return false; + for (DWARFDie ChildDie : Die.children()) { + if (hasInlineInfo(ChildDie, Depth + 1)) + return true; + } + return false; +} + +static void parseInlineInfo(GsymCreator &Gsym, CUInfo &CUI, DWARFDie Die, + uint32_t Depth, FunctionInfo &FI, + InlineInfo &parent) { + if (!hasInlineInfo(Die, Depth)) + return; + + dwarf::Tag Tag = Die.getTag(); + if (Tag == dwarf::DW_TAG_inlined_subroutine) { + // create new InlineInfo and append to parent.children + InlineInfo II; + DWARFAddressRange FuncRange = + DWARFAddressRange(FI.startAddress(), FI.endAddress()); + Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); + if (RangesOrError) { + for (const DWARFAddressRange &Range : RangesOrError.get()) { + // Check that the inlined function is within the range of the function + // info, it might not be in case of split functions + if (FuncRange.LowPC <= Range.LowPC && Range.HighPC <= FuncRange.HighPC) + II.Ranges.insert(AddressRange(Range.LowPC, Range.HighPC)); + } + } + if (II.Ranges.empty()) + return; + + if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym)) + II.Name = *NameIndex; + II.CallFile = CUI.DWARFToGSYMFileIndex( + Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0)); + II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0); + // parse all children and append to parent + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, II); + parent.Children.emplace_back(std::move(II)); + return; + } + if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) { + // skip this Die and just recurse down + for (DWARFDie ChildDie : Die.children()) + parseInlineInfo(Gsym, CUI, ChildDie, Depth + 1, FI, parent); + } +} + +static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI, + DWARFDie Die, GsymCreator &Gsym, + FunctionInfo &FI) { + std::vector<uint32_t> RowVector; + const uint64_t StartAddress = FI.startAddress(); + const uint64_t EndAddress = FI.endAddress(); + const uint64_t RangeSize = EndAddress - StartAddress; + const object::SectionedAddress SecAddress{ + StartAddress, object::SectionedAddress::UndefSection}; + + + if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) { + // If we have a DW_TAG_subprogram but no line entries, fall back to using + // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes. + if (auto FileIdx = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) { + if (auto Line = + dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) { + LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx), + *Line); + FI.OptLineTable = LineTable(); + FI.OptLineTable->push(LE); + // LE.Addr = EndAddress; + // FI.OptLineTable->push(LE); + } + } + return; + } + + FI.OptLineTable = LineTable(); + DWARFDebugLine::Row PrevRow; + for (uint32_t RowIndex : RowVector) { + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex]; + const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File); + uint64_t RowAddress = Row.Address.Address; + // Watch out for a RowAddress that is in the middle of a line table entry + // in the DWARF. If we pass an address in between two line table entries + // we will get a RowIndex for the previous valid line table row which won't + // be contained in our function. This is usually a bug in the DWARF due to + // linker problems or LTO or other DWARF re-linking so it is worth emitting + // an error, but not worth stopping the creation of the GSYM. + if (!FI.Range.contains(RowAddress)) { + if (RowAddress < FI.Range.Start) { + Log << "error: DIE has a start address whose LowPC is between the " + "line table Row[" << RowIndex << "] with address " + << HEX64(RowAddress) << " and the next one.\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + RowAddress = FI.Range.Start; + } else { + continue; + } + } + + LineEntry LE(RowAddress, FileIdx, Row.Line); + if (RowIndex != RowVector[0] && Row.Address < PrevRow.Address) { + // We have seen full duplicate line tables for functions in some + // DWARF files. Watch for those here by checking the the last + // row was the function's end address (HighPC) and that the + // current line table entry's address is the same as the first + // line entry we already have in our "function_info.Lines". If + // so break out after printing a warning. + auto FirstLE = FI.OptLineTable->first(); + if (FirstLE && *FirstLE == LE) { + Log << "warning: duplicate line table detected for DIE:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } else { + // Print out (ignore if os == nulls as this is expensive) + Log << "error: line table has addresses that do not " + << "monotonically increase:\n"; + for (uint32_t RowIndex2 : RowVector) { + CUI.LineTable->Rows[RowIndex2].dump(Log); + } + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + // Skip multiple line entries for the same file and line. + auto LastLE = FI.OptLineTable->last(); + if (LastLE && LastLE->File == FileIdx && LastLE->Line == Row.Line) + continue; + // Only push a row if it isn't an end sequence. End sequence markers are + // included for the last address in a function or the last contiguous + // address in a sequence. + if (Row.EndSequence) { + // End sequence means that the next line entry could have a lower address + // that the previous entries. So we clear the previous row so we don't + // trigger the line table error about address that do not monotonically + // increase. + PrevRow = DWARFDebugLine::Row(); + } else { + FI.OptLineTable->push(LE); + PrevRow = Row; + } + } + // If not line table rows were added, clear the line table so we don't encode + // on in the GSYM file. + if (FI.OptLineTable->empty()) + FI.OptLineTable = llvm::None; +} + +void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) { + switch (Die.getTag()) { + case dwarf::DW_TAG_subprogram: { + Expected<DWARFAddressRangesVector> RangesOrError = Die.getAddressRanges(); + if (!RangesOrError) { + consumeError(RangesOrError.takeError()); + break; + } + const DWARFAddressRangesVector &Ranges = RangesOrError.get(); + if (Ranges.empty()) + break; + auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym); + if (!NameIndex) { + OS << "error: function at " << HEX64(Die.getOffset()) + << " has no name\n "; + Die.dump(OS, 0, DIDumpOptions::getForSingleDIE()); + break; + } + + // Create a function_info for each range + for (const DWARFAddressRange &Range : Ranges) { + // The low PC must be less than the high PC. Many linkers don't remove + // DWARF for functions that don't get linked into the final executable. + // If both the high and low pc have relocations, linkers will often set + // the address values for both to the same value to indicate the function + // has been remove. Other linkers have been known to set the one or both + // PC values to a UINT32_MAX for 4 byte addresses and UINT64_MAX for 8 + // byte addresses to indicate the function isn't valid. The check below + // tries to watch for these cases and abort if it runs into them. + if (Range.LowPC >= Range.HighPC || CUI.isHighestAddress(Range.LowPC)) + break; + + // Many linkers can't remove DWARF and might set the LowPC to zero. Since + // high PC can be an offset from the low PC in more recent DWARF versions + // we need to watch for a zero'ed low pc which we do using + // ValidTextRanges below. + if (!Gsym.IsValidTextAddress(Range.LowPC)) { + // We expect zero and -1 to be invalid addresses in DWARF depending + // on the linker of the DWARF. This indicates a function was stripped + // and the debug info wasn't able to be stripped from the DWARF. If + // the LowPC isn't zero or -1, then we should emit an error. + if (Range.LowPC != 0) { + // Unexpected invalid address, emit an error + Log << "warning: DIE has an address range whose start address is " + "not in any executable sections (" << + *Gsym.GetValidTextRanges() << ") and will not be processed:\n"; + Die.dump(Log, 0, DIDumpOptions::getForSingleDIE()); + } + break; + } + + FunctionInfo FI; + FI.setStartAddress(Range.LowPC); + FI.setEndAddress(Range.HighPC); + FI.Name = *NameIndex; + if (CUI.LineTable) { + convertFunctionLineTable(OS, CUI, Die, Gsym, FI); + } + if (hasInlineInfo(Die, 0)) { + FI.Inline = InlineInfo(); + FI.Inline->Name = *NameIndex; + FI.Inline->Ranges.insert(FI.Range); + parseInlineInfo(Gsym, CUI, Die, 0, FI, *FI.Inline); + } + Gsym.addFunctionInfo(std::move(FI)); + } + } break; + default: + break; + } + for (DWARFDie ChildDie : Die.children()) + handleDie(OS, CUI, ChildDie); +} + +Error DwarfTransformer::convert(uint32_t NumThreads) { + size_t NumBefore = Gsym.getNumFunctionInfos(); + if (NumThreads == 1) { + // Parse all DWARF data from this thread, use the same string/file table + // for everything + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false); + CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); + handleDie(Log, CUI, Die); + } + } else { + // LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up + // front before we start accessing any DIEs since there might be + // cross compile unit references in the DWARF. If we don't do this we can + // end up crashing. + + // We need to call getAbbreviations sequentially first so that getUnitDIE() + // only works with its local data. + for (const auto &CU : DICtx.compile_units()) + CU->getAbbreviations(); + + // Now parse all DIEs in case we have cross compile unit references in a + // thread pool. + ThreadPool pool(hardware_concurrency(NumThreads)); + for (const auto &CU : DICtx.compile_units()) + pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); + pool.wait(); + + // Now convert all DWARF to GSYM in a thread pool. + std::mutex LogMutex; + for (const auto &CU : DICtx.compile_units()) { + DWARFDie Die = CU->getUnitDIE(false /*CUDieOnly*/); + if (Die) { + CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get())); + pool.async([this, CUI, &LogMutex, Die]() mutable { + std::string ThreadLogStorage; + raw_string_ostream ThreadOS(ThreadLogStorage); + handleDie(ThreadOS, CUI, Die); + ThreadOS.flush(); + if (!ThreadLogStorage.empty()) { + // Print ThreadLogStorage lines into an actual stream under a lock + std::lock_guard<std::mutex> guard(LogMutex); + Log << ThreadLogStorage; + } + }); + } + } + pool.wait(); + } + size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n"; + return Error::success(); +} + +llvm::Error DwarfTransformer::verify(StringRef GsymPath) { + Log << "Verifying GSYM file \"" << GsymPath << "\":\n"; + + auto Gsym = GsymReader::openFile(GsymPath); + if (!Gsym) + return Gsym.takeError(); + + auto NumAddrs = Gsym->getNumAddresses(); + DILineInfoSpecifier DLIS( + DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, + DILineInfoSpecifier::FunctionNameKind::LinkageName); + std::string gsymFilename; + for (uint32_t I = 0; I < NumAddrs; ++I) { + auto FuncAddr = Gsym->getAddress(I); + if (!FuncAddr) + return createStringError(std::errc::invalid_argument, + "failed to extract address[%i]", I); + + auto FI = Gsym->getFunctionInfo(*FuncAddr); + if (!FI) + return createStringError(std::errc::invalid_argument, + "failed to extract function info for address 0x%" + PRIu64, *FuncAddr); + + for (auto Addr = *FuncAddr; Addr < *FuncAddr + FI->size(); ++Addr) { + const object::SectionedAddress SectAddr{ + Addr, object::SectionedAddress::UndefSection}; + auto LR = Gsym->lookup(Addr); + if (!LR) + return LR.takeError(); + + auto DwarfInlineInfos = + DICtx.getInliningInfoForAddress(SectAddr, DLIS); + uint32_t NumDwarfInlineInfos = DwarfInlineInfos.getNumberOfFrames(); + if (NumDwarfInlineInfos == 0) { + DwarfInlineInfos.addFrame( + DICtx.getLineInfoForAddress(SectAddr, DLIS)); + } + + // Check for 1 entry that has no file and line info + if (NumDwarfInlineInfos == 1 && + DwarfInlineInfos.getFrame(0).FileName == "<invalid>") { + DwarfInlineInfos = DIInliningInfo(); + NumDwarfInlineInfos = 0; + } + if (NumDwarfInlineInfos > 0 && + NumDwarfInlineInfos != LR->Locations.size()) { + Log << "error: address " << HEX64(Addr) << " has " + << NumDwarfInlineInfos << " DWARF inline frames and GSYM has " + << LR->Locations.size() << "\n"; + Log << " " << NumDwarfInlineInfos << " DWARF frames:\n"; + for (size_t Idx = 0; Idx < NumDwarfInlineInfos; ++Idx) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + Log << " [" << Idx << "]: " << dii.FunctionName << " @ " + << dii.FileName << ':' << dii.Line << '\n'; + } + Log << " " << LR->Locations.size() << " GSYM frames:\n"; + for (size_t Idx = 0, count = LR->Locations.size(); + Idx < count; ++Idx) { + const auto &gii = LR->Locations[Idx]; + Log << " [" << Idx << "]: " << gii.Name << " @ " << gii.Dir + << '/' << gii.Base << ':' << gii.Line << '\n'; + } + DwarfInlineInfos = DICtx.getInliningInfoForAddress(SectAddr, DLIS); + Gsym->dump(Log, *FI); + continue; + } + + for (size_t Idx = 0, count = LR->Locations.size(); Idx < count; + ++Idx) { + const auto &gii = LR->Locations[Idx]; + if (Idx < NumDwarfInlineInfos) { + const auto dii = DwarfInlineInfos.getFrame(Idx); + gsymFilename = LR->getSourceFile(Idx); + // Verify function name + if (dii.FunctionName.find(gii.Name.str()) != 0) + Log << "error: address " << HEX64(Addr) << " DWARF function \"" + << dii.FunctionName.c_str() + << "\" doesn't match GSYM function \"" << gii.Name << "\"\n"; + // Verify source file path + if (dii.FileName != gsymFilename) + Log << "error: address " << HEX64(Addr) << " DWARF path \"" + << dii.FileName.c_str() << "\" doesn't match GSYM path \"" + << gsymFilename.c_str() << "\"\n"; + // Verify source file line + if (dii.Line != gii.Line) + Log << "error: address " << HEX64(Addr) << " DWARF line " + << dii.Line << " != GSYM line " << gii.Line << "\n"; + } + } + } + } + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp index 6731a8b27443..cef1b9498c5c 100644 --- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp @@ -25,8 +25,11 @@ enum InfoType : uint32_t { }; raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) { - OS << '[' << HEX64(FI.Range.Start) << '-' << HEX64(FI.Range.End) << "): " - << "Name=" << HEX32(FI.Name) << '\n' << FI.OptLineTable << FI.Inline; + OS << FI.Range << ": " << "Name=" << HEX32(FI.Name) << '\n'; + if (FI.OptLineTable) + OS << FI.OptLineTable << '\n'; + if (FI.Inline) + OS << FI.Inline << '\n'; return OS; } @@ -167,7 +170,7 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data, // This function will be called with the result of a binary search of the // address table, we must still make sure the address does not fall into a // gap between functions are after the last function. - if (Addr >= LR.FuncRange.End) + if (LR.FuncRange.size() > 0 && !LR.FuncRange.contains(Addr)) return createStringError(std::errc::io_error, "address 0x%" PRIx64 " is not in GSYM", Addr); @@ -220,6 +223,7 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data, // location as best we can and return. SourceLocation SrcLoc; SrcLoc.Name = LR.FuncName; + SrcLoc.Offset = Addr - FuncAddr; LR.Locations.push_back(SrcLoc); return LR; } @@ -232,6 +236,7 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data, SourceLocation SrcLoc; SrcLoc.Name = LR.FuncName; + SrcLoc.Offset = Addr - FuncAddr; SrcLoc.Dir = GR.getString(LineEntryFile->Dir); SrcLoc.Base = GR.getString(LineEntryFile->Base); SrcLoc.Line = LineEntry->Line; diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp index f371426f2010..7d9b72c6283d 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp @@ -29,7 +29,13 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); llvm::StringRef filename = llvm::sys::path::filename(Path, Style); - FileEntry FE(insertString(directory), insertString(filename)); + // We must insert the strings first, then call the FileEntry constructor. + // If we inline the insertString() function call into the constructor, the + // call order is undefined due to parameter lists not having any ordering + // requirements. + const uint32_t Dir = insertString(directory); + const uint32_t Base = insertString(filename); + FileEntry FE(Dir, Base); std::lock_guard<std::recursive_mutex> Guard(Mutex); const auto NextIndex = Files.size(); @@ -62,7 +68,8 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { if (Funcs.size() > UINT32_MAX) return createStringError(std::errc::invalid_argument, "too many FunctionInfos"); - const uint64_t MinAddr = Funcs.front().startAddress(); + + const uint64_t MinAddr = BaseAddress ? *BaseAddress : Funcs.front().startAddress(); const uint64_t MaxAddr = Funcs.back().startAddress(); const uint64_t AddrDelta = MaxAddr - MinAddr; Header Hdr; @@ -73,7 +80,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const { Hdr.BaseAddress = MinAddr; Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); Hdr.StrtabOffset = 0; // We will fix this up later. - Hdr.StrtabOffset = 0; // We will fix this up later. + Hdr.StrtabSize = 0; // We will fix this up later. memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); if (UUID.size() > sizeof(Hdr.UUID)) return createStringError(std::errc::invalid_argument, @@ -203,9 +210,8 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { // that have debug info are last in the sort. if (*Prev == *Curr) { // FunctionInfo entries match exactly (range, lines, inlines) - OS << "warning: duplicate function info entries, removing " - "duplicate:\n" - << *Curr << '\n'; + OS << "warning: duplicate function info entries for range: " + << Curr->Range << '\n'; Curr = Funcs.erase(Prev); } else { if (!Prev->hasRichInfo() && Curr->hasRichInfo()) { @@ -239,20 +245,43 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { Prev = Curr++; } + // If our last function info entry doesn't have a size and if we have valid + // text ranges, we should set the size of the last entry since any search for + // a high address might match our last entry. By fixing up this size, we can + // help ensure we don't cause lookups to always return the last symbol that + // has no size when doing lookups. + if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { + if (auto Range = ValidTextRanges->getRangeThatContains( + Funcs.back().Range.Start)) { + Funcs.back().Range.End = Range->End; + } + } OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " << Funcs.size() << " total\n"; return Error::success(); } -uint32_t GsymCreator::insertString(StringRef S) { - std::lock_guard<std::recursive_mutex> Guard(Mutex); +uint32_t GsymCreator::insertString(StringRef S, bool Copy) { if (S.empty()) return 0; + std::lock_guard<std::recursive_mutex> Guard(Mutex); + if (Copy) { + // We need to provide backing storage for the string if requested + // since StringTableBuilder stores references to strings. Any string + // that comes from a section in an object file doesn't need to be + // copied, but any string created by code will need to be copied. + // This allows GsymCreator to be really fast when parsing DWARF and + // other object files as most strings don't need to be copied. + CachedHashStringRef CHStr(S); + if (!StrTab.contains(CHStr)) + S = StringStorage.insert(S).first->getKey(); + } return StrTab.add(S); } void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { std::lock_guard<std::recursive_mutex> Guard(Mutex); + Ranges.insert(FI.Range); Funcs.emplace_back(FI); } @@ -273,3 +302,19 @@ void GsymCreator::forEachFunctionInfo( break; } } + +size_t GsymCreator::getNumFunctionInfos() const{ + std::lock_guard<std::recursive_mutex> Guard(Mutex); + return Funcs.size(); +} + +bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { + if (ValidTextRanges) + return ValidTextRanges->contains(Addr); + return true; // No valid text ranges has been set, so accept all ranges. +} + +bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { + std::lock_guard<std::recursive_mutex> Guard(Mutex); + return Ranges.contains(Addr); +} diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp index b4f3f2052ae7..2ad18bf63d5d 100644 --- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp +++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp @@ -225,20 +225,33 @@ Optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const { Expected<uint64_t> GsymReader::getAddressIndex(const uint64_t Addr) const { - if (Addr < Hdr->BaseAddress) - return createStringError(std::errc::invalid_argument, - "address 0x%" PRIx64 " not in GSYM", Addr); - const uint64_t AddrOffset = Addr - Hdr->BaseAddress; - switch (Hdr->AddrOffSize) { - case 1: return getAddressOffsetIndex<uint8_t>(AddrOffset); - case 2: return getAddressOffsetIndex<uint16_t>(AddrOffset); - case 4: return getAddressOffsetIndex<uint32_t>(AddrOffset); - case 8: return getAddressOffsetIndex<uint64_t>(AddrOffset); - default: break; + if (Addr >= Hdr->BaseAddress) { + const uint64_t AddrOffset = Addr - Hdr->BaseAddress; + Optional<uint64_t> AddrOffsetIndex; + switch (Hdr->AddrOffSize) { + case 1: + AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset); + break; + case 2: + AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset); + break; + case 4: + AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset); + break; + case 8: + AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset); + break; + default: + return createStringError(std::errc::invalid_argument, + "unsupported address offset size %u", + Hdr->AddrOffSize); + } + if (AddrOffsetIndex) + return *AddrOffsetIndex; } return createStringError(std::errc::invalid_argument, - "unsupported address offset size %u", - Hdr->AddrOffSize); + "address 0x%" PRIx64 " is not in GSYM", Addr); + } llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { @@ -255,7 +268,7 @@ llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const { if (ExpectedFI->Range.contains(Addr) || ExpectedFI->Range.size() == 0) return ExpectedFI; return createStringError(std::errc::invalid_argument, - "address 0x%" PRIx64 " not in GSYM", Addr); + "address 0x%" PRIx64 " is not in GSYM", Addr); } } return createStringError(std::errc::invalid_argument, @@ -277,3 +290,117 @@ llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const { "failed to extract address[%" PRIu64 "]", *AddressIndex); } + +void GsymReader::dump(raw_ostream &OS) { + const auto &Header = getHeader(); + // Dump the GSYM header. + OS << Header << "\n"; + // Dump the address table. + OS << "Address Table:\n"; + OS << "INDEX OFFSET"; + + switch (Hdr->AddrOffSize) { + case 1: OS << "8 "; break; + case 2: OS << "16"; break; + case 4: OS << "32"; break; + case 8: OS << "64"; break; + default: OS << "??"; break; + } + OS << " (ADDRESS)\n"; + OS << "====== =============================== \n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << format("[%4u] ", I); + switch (Hdr->AddrOffSize) { + case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break; + case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break; + case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break; + case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break; + default: break; + } + OS << " (" << HEX64(*getAddress(I)) << ")\n"; + } + // Dump the address info offsets table. + OS << "\nAddress Info Offsets:\n"; + OS << "INDEX Offset\n"; + OS << "====== ==========\n"; + for (uint32_t I = 0; I < Header.NumAddresses; ++I) + OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n"; + // Dump the file table. + OS << "\nFiles:\n"; + OS << "INDEX DIRECTORY BASENAME PATH\n"; + OS << "====== ========== ========== ==============================\n"; + for (uint32_t I = 0; I < Files.size(); ++I) { + OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' ' + << HEX32(Files[I].Base) << ' '; + dump(OS, getFile(I)); + OS << "\n"; + } + OS << "\n" << StrTab << "\n"; + + for (uint32_t I = 0; I < Header.NumAddresses; ++I) { + OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": "; + if (auto FI = getFunctionInfo(*getAddress(I))) + dump(OS, *FI); + else + logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:"); + } +} + +void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) { + OS << FI.Range << " \"" << getString(FI.Name) << "\"\n"; + if (FI.OptLineTable) + dump(OS, *FI.OptLineTable); + if (FI.Inline) + dump(OS, *FI.Inline); +} + +void GsymReader::dump(raw_ostream &OS, const LineTable <) { + OS << "LineTable:\n"; + for (auto &LE: LT) { + OS << " " << HEX64(LE.Addr) << ' '; + if (LE.File) + dump(OS, getFile(LE.File)); + OS << ':' << LE.Line << '\n'; + } +} + +void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) { + if (Indent == 0) + OS << "InlineInfo:\n"; + else + OS.indent(Indent); + OS << II.Ranges << ' ' << getString(II.Name); + if (II.CallFile != 0) { + if (auto File = getFile(II.CallFile)) { + OS << " called from "; + dump(OS, File); + OS << ':' << II.CallLine; + } + } + OS << '\n'; + for (const auto &ChildII: II.Children) + dump(OS, ChildII, Indent + 2); +} + +void GsymReader::dump(raw_ostream &OS, Optional<FileEntry> FE) { + if (FE) { + // IF we have the file from index 0, then don't print anything + if (FE->Dir == 0 && FE->Base == 0) + return; + StringRef Dir = getString(FE->Dir); + StringRef Base = getString(FE->Base); + if (!Dir.empty()) { + OS << Dir; + if (Dir.contains('\\') && !Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (!Base.empty()) { + OS << Base; + } + if (!Dir.empty() || !Base.empty()) + return; + } + OS << "<invalid-file>"; +} diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp index 1b8c974fdcd2..21679b1b78aa 100644 --- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp +++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp @@ -142,13 +142,17 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, return false; } - SourceLocation SrcLoc; - SrcLoc.Name = SrcLocs.back().Name; - SrcLoc.Dir = GR.getString(CallFile->Dir); - SrcLoc.Base = GR.getString(CallFile->Base); - SrcLoc.Line = Inline.CallLine; - SrcLocs.back().Name = GR.getString(Inline.Name); - SrcLocs.push_back(SrcLoc); + if (CallFile->Dir || CallFile->Base) { + SourceLocation SrcLoc; + SrcLoc.Name = SrcLocs.back().Name; + SrcLoc.Offset = SrcLocs.back().Offset; + SrcLoc.Dir = GR.getString(CallFile->Dir); + SrcLoc.Base = GR.getString(CallFile->Base); + SrcLoc.Line = Inline.CallLine; + SrcLocs.back().Name = GR.getString(Inline.Name); + SrcLocs.back().Offset = Addr - Inline.Ranges[0].Start; + SrcLocs.push_back(SrcLoc); + } return true; } diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp index c54b166b2887..8a624226b1d3 100644 --- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp +++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp @@ -21,7 +21,7 @@ std::string LookupResult::getSourceFile(uint32_t Index) const { if (Index < Locations.size()) { if (!Locations[Index].Dir.empty()) { if (Locations[Index].Base.empty()) { - Fullpath = Locations[Index].Dir; + Fullpath = std::string(Locations[Index].Dir); } else { llvm::SmallString<64> Storage; llvm::sys::path::append(Storage, Locations[Index].Dir, @@ -29,25 +29,30 @@ std::string LookupResult::getSourceFile(uint32_t Index) const { Fullpath.assign(Storage.begin(), Storage.end()); } } else if (!Locations[Index].Base.empty()) - Fullpath = Locations[Index].Base; + Fullpath = std::string(Locations[Index].Base); } return Fullpath; } raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) { - OS << SL.Name << " @ "; - if (!SL.Dir.empty()) { - OS << SL.Dir; - if (SL.Dir.contains('\\') and not SL.Dir.contains('/')) - OS << '\\'; + OS << SL.Name; + if (SL.Offset > 0) + OS << " + " << SL.Offset; + if (SL.Dir.size() || SL.Base.size()) { + OS << " @ "; + if (!SL.Dir.empty()) { + OS << SL.Dir; + if (SL.Dir.contains('\\') and not SL.Dir.contains('/')) + OS << '\\'; + else + OS << '/'; + } + if (SL.Base.empty()) + OS << "<invalid-file>"; else - OS << '/'; + OS << SL.Base; + OS << ':' << SL.Line; } - if (SL.Base.empty()) - OS << "<invalid-file>"; - else - OS << SL.Base; - OS << ':' << SL.Line; return OS; } diff --git a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp new file mode 100644 index 000000000000..ad35aefe7774 --- /dev/null +++ b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp @@ -0,0 +1,116 @@ +//===- ObjectFileTransformer.cpp --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <unordered_set> + +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/raw_ostream.h" + +#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h" +#include "llvm/DebugInfo/GSYM/GsymCreator.h" + +using namespace llvm; +using namespace gsym; + +constexpr uint32_t NT_GNU_BUILD_ID_TAG = 0x03; + +static std::vector<uint8_t> getUUID(const object::ObjectFile &Obj) { + // Extract the UUID from the object file + std::vector<uint8_t> UUID; + if (auto *MachO = dyn_cast<object::MachOObjectFile>(&Obj)) { + const ArrayRef<uint8_t> MachUUID = MachO->getUuid(); + if (!MachUUID.empty()) + UUID.assign(MachUUID.data(), MachUUID.data() + MachUUID.size()); + } else if (isa<object::ELFObjectFileBase>(&Obj)) { + const StringRef GNUBuildID(".note.gnu.build-id"); + for (const object::SectionRef &Sect : Obj.sections()) { + Expected<StringRef> SectNameOrErr = Sect.getName(); + if (!SectNameOrErr) { + consumeError(SectNameOrErr.takeError()); + continue; + } + StringRef SectName(*SectNameOrErr); + if (SectName != GNUBuildID) + continue; + StringRef BuildIDData; + Expected<StringRef> E = Sect.getContents(); + if (E) + BuildIDData = *E; + else { + consumeError(E.takeError()); + continue; + } + DataExtractor Decoder(BuildIDData, Obj.makeTriple().isLittleEndian(), 8); + uint64_t Offset = 0; + const uint32_t NameSize = Decoder.getU32(&Offset); + const uint32_t PayloadSize = Decoder.getU32(&Offset); + const uint32_t PayloadType = Decoder.getU32(&Offset); + StringRef Name(Decoder.getFixedLengthString(&Offset, NameSize)); + if (Name == "GNU" && PayloadType == NT_GNU_BUILD_ID_TAG) { + Offset = alignTo(Offset, 4); + StringRef UUIDBytes(Decoder.getBytes(&Offset, PayloadSize)); + if (!UUIDBytes.empty()) { + auto Ptr = reinterpret_cast<const uint8_t *>(UUIDBytes.data()); + UUID.assign(Ptr, Ptr + UUIDBytes.size()); + } + } + } + } + return UUID; +} + +llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj, + raw_ostream &Log, + GsymCreator &Gsym) { + using namespace llvm::object; + + const bool IsMachO = isa<MachOObjectFile>(&Obj); + const bool IsELF = isa<ELFObjectFileBase>(&Obj); + + // Read build ID. + Gsym.setUUID(getUUID(Obj)); + + // Parse the symbol table. + size_t NumBefore = Gsym.getNumFunctionInfos(); + for (const object::SymbolRef &Sym : Obj.symbols()) { + Expected<SymbolRef::Type> SymType = Sym.getType(); + if (!SymType) { + consumeError(SymType.takeError()); + continue; + } + Expected<uint64_t> AddrOrErr = Sym.getValue(); + if (!AddrOrErr) + // TODO: Test this error. + return AddrOrErr.takeError(); + + if (SymType.get() != SymbolRef::Type::ST_Function || + !Gsym.IsValidTextAddress(*AddrOrErr) || + Gsym.hasFunctionInfoForAddress(*AddrOrErr)) + continue; + // Function size for MachO files will be 0 + constexpr bool NoCopy = false; + const uint64_t size = IsELF ? ELFSymbolRef(Sym).getSize() : 0; + Expected<StringRef> Name = Sym.getName(); + if (!Name) { + logAllUnhandledErrors(Name.takeError(), Log, "ObjectFileTransformer: "); + continue; + } + // Remove the leading '_' character in any symbol names if there is one + // for mach-o files. + if (IsMachO) + Name->consume_front("_"); + Gsym.addFunctionInfo( + FunctionInfo(*AddrOrErr, size, Gsym.insertString(*Name, NoCopy))); + } + size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore; + Log << "Loaded " << FunctionsAddedCount << " functions from symbol table.\n"; + return Error::success(); +} diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp index f78101e49bf8..044ddb8ba1ba 100644 --- a/llvm/lib/DebugInfo/GSYM/Range.cpp +++ b/llvm/lib/DebugInfo/GSYM/Range.cpp @@ -53,6 +53,16 @@ bool AddressRanges::contains(AddressRange Range) const { return Range.End <= It[-1].End; } +Optional<AddressRange> +AddressRanges::getRangeThatContains(uint64_t Addr) const { + auto It = std::partition_point( + Ranges.begin(), Ranges.end(), + [=](const AddressRange &R) { return R.Start <= Addr; }); + if (It != Ranges.begin() && Addr < It[-1].End) + return It[-1]; + return llvm::None; +} + raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) { return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")"; } diff --git a/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp index 64ffa776bbd6..2729e3236965 100644 --- a/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp +++ b/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp @@ -189,8 +189,8 @@ DIASession::getSymbolById(SymIndexId SymbolId) const { return PDBSymbol::create(*this, std::move(RawSymbol)); } -std::unique_ptr<PDBSymbol> -DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { +std::unique_ptr<PDBSymbol> DIASession::findSymbolByAddress(uint64_t Address, + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type); CComPtr<IDiaSymbol> Symbol; @@ -207,7 +207,7 @@ DIASession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { } std::unique_ptr<PDBSymbol> DIASession::findSymbolByRVA(uint32_t RVA, - PDB_SymType Type) const { + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type); CComPtr<IDiaSymbol> Symbol; @@ -220,7 +220,7 @@ std::unique_ptr<PDBSymbol> DIASession::findSymbolByRVA(uint32_t RVA, std::unique_ptr<PDBSymbol> DIASession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const { + PDB_SymType Type) { enum SymTagEnum EnumVal = static_cast<enum SymTagEnum>(Type); CComPtr<IDiaSymbol> Symbol; diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp index 419734771ccd..73801ea1dd1b 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp @@ -39,7 +39,7 @@ static uint32_t calculateDiSymbolStreamSize(uint32_t SymbolByteSize, DbiModuleDescriptorBuilder::DbiModuleDescriptorBuilder(StringRef ModuleName, uint32_t ModIndex, msf::MSFBuilder &Msf) - : MSF(Msf), ModuleName(ModuleName) { + : MSF(Msf), ModuleName(std::string(ModuleName)) { ::memset(&Layout, 0, sizeof(Layout)); Layout.Mod = ModIndex; } @@ -51,7 +51,7 @@ uint16_t DbiModuleDescriptorBuilder::getStreamIndex() const { } void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) { - ObjFileName = Name; + ObjFileName = std::string(Name); } void DbiModuleDescriptorBuilder::setPdbFilePathNI(uint32_t NI) { @@ -83,14 +83,13 @@ void DbiModuleDescriptorBuilder::addSymbolsInBulk( } void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) { - SourceFiles.push_back(Path); + SourceFiles.push_back(std::string(Path)); } uint32_t DbiModuleDescriptorBuilder::calculateC13DebugInfoSize() const { uint32_t Result = 0; for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - Result += Builder->calculateSerializedLength(); + Result += Builder.calculateSerializedLength(); } return Result; } @@ -163,8 +162,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, "Invalid debug section alignment!"); // TODO: Write C11 Line data for (const auto &Builder : C13Builders) { - assert(Builder && "Empty C13 Fragment Builder!"); - if (auto EC = Builder->commit(SymbolWriter)) + if (auto EC = Builder.commit(SymbolWriter, CodeViewContainer::Pdb)) return EC; } @@ -180,12 +178,10 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter, void DbiModuleDescriptorBuilder::addDebugSubsection( std::shared_ptr<DebugSubsection> Subsection) { assert(Subsection); - C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>( - std::move(Subsection), CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(std::move(Subsection))); } void DbiModuleDescriptorBuilder::addDebugSubsection( const DebugSubsectionRecord &SubsectionContents) { - C13Builders.push_back(std::make_unique<DebugSubsectionRecordBuilder>( - SubsectionContents, CodeViewContainer::Pdb)); + C13Builders.push_back(DebugSubsectionRecordBuilder(SubsectionContents)); } diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index 0e00c2f7ff98..627aef7506fd 100644 --- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -58,10 +58,6 @@ void DbiStreamBuilder::setMachineType(COFF::MachineTypes M) { MachineType = static_cast<pdb::PDB_Machine>(static_cast<unsigned>(M)); } -void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) { - SectionMap = SecMap; -} - void DbiStreamBuilder::setGlobalsStreamIndex(uint32_t Index) { GlobalsStreamIndex = Index; } @@ -348,19 +344,18 @@ static uint16_t toSecMapFlags(uint32_t Flags) { return Ret; } -// A utility function to create a Section Map for a given list of COFF sections. +// Populate the Section Map from COFF section headers. // // A Section Map seem to be a copy of a COFF section list in other format. // I don't know why a PDB file contains both a COFF section header and // a Section Map, but it seems it must be present in a PDB. -std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap( +void DbiStreamBuilder::createSectionMap( ArrayRef<llvm::object::coff_section> SecHdrs) { - std::vector<SecMapEntry> Ret; int Idx = 0; auto Add = [&]() -> SecMapEntry & { - Ret.emplace_back(); - auto &Entry = Ret.back(); + SectionMap.emplace_back(); + auto &Entry = SectionMap.back(); memset(&Entry, 0, sizeof(Entry)); Entry.Frame = Idx + 1; @@ -384,8 +379,6 @@ std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap( Entry.Flags = static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit) | static_cast<uint16_t>(OMFSegDescFlags::IsAbsoluteAddress); Entry.SecByteLength = UINT32_MAX; - - return Ret; } Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, @@ -417,7 +410,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout, SecMapHeader SMHeader = {Size, Size}; if (auto EC = Writer.writeObject(SMHeader)) return EC; - if (auto EC = Writer.writeArray(SectionMap)) + if (auto EC = Writer.writeArray(makeArrayRef(SectionMap))) return EC; } diff --git a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp index f5125393695b..37192ba36a04 100644 --- a/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/EnumTables.cpp @@ -34,4 +34,4 @@ ArrayRef<EnumEntry<uint16_t>> getOMFSegMapDescFlagNames() { return makeArrayRef(OMFSegMapDescFlagNames); } } -}
\ No newline at end of file +} diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp index 432f1e9b24d3..4e58489f1401 100644 --- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp @@ -5,10 +5,14 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// The data structures defined in this file are based on the reference +// implementation which is available at +// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp +// +//===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h" - -#include "llvm/ADT/DenseSet.h" #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -20,6 +24,7 @@ #include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/Support/BinaryItemStream.h" #include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/xxhash.h" #include <algorithm> #include <vector> @@ -29,53 +34,91 @@ using namespace llvm::msf; using namespace llvm::pdb; using namespace llvm::codeview; +// Helper class for building the public and global PDB hash table buckets. struct llvm::pdb::GSIHashStreamBuilder { - struct SymbolDenseMapInfo { - static inline CVSymbol getEmptyKey() { - static CVSymbol Empty; - return Empty; - } - static inline CVSymbol getTombstoneKey() { - static CVSymbol Tombstone( - DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); - return Tombstone; - } - static unsigned getHashValue(const CVSymbol &Val) { - return xxHash64(Val.RecordData); - } - static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { - return LHS.RecordData == RHS.RecordData; - } - }; + // Sum of the size of all public or global records. + uint32_t RecordByteSize = 0; - std::vector<CVSymbol> Records; - uint32_t StreamIndex; - llvm::DenseSet<CVSymbol, SymbolDenseMapInfo> SymbolHashes; std::vector<PSHashRecord> HashRecords; + + // The hash bitmap has `ceil((IPHR_HASH + 1) / 32)` words in it. The + // reference implementation builds a hash table with IPHR_HASH buckets in it. + // The last bucket is used to link together free hash table cells in a linked + // list, but it is always empty in the compressed, on-disk format. However, + // the bitmap must have a bit for it. std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap; + std::vector<support::ulittle32_t> HashBuckets; uint32_t calculateSerializedLength() const; - uint32_t calculateRecordByteSize() const; Error commit(BinaryStreamWriter &Writer); - void finalizeBuckets(uint32_t RecordZeroOffset); - template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) { - T Copy(Symbol); - addSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), - CodeViewContainer::Pdb)); - } - void addSymbol(const CVSymbol &Symbol) { - if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { - auto Iter = SymbolHashes.insert(Symbol); - if (!Iter.second) - return; - } + void finalizePublicBuckets(); + void finalizeGlobalBuckets(uint32_t RecordZeroOffset); + + // Assign public and global symbol records into hash table buckets. + // Modifies the list of records to store the bucket index, but does not + // change the order. + void finalizeBuckets(uint32_t RecordZeroOffset, + MutableArrayRef<BulkPublic> Globals); +}; - Records.push_back(Symbol); +// DenseMapInfo implementation for deduplicating symbol records. +struct llvm::pdb::SymbolDenseMapInfo { + static inline CVSymbol getEmptyKey() { + static CVSymbol Empty; + return Empty; + } + static inline CVSymbol getTombstoneKey() { + static CVSymbol Tombstone( + DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey()); + return Tombstone; + } + static unsigned getHashValue(const CVSymbol &Val) { + return xxHash64(Val.RecordData); + } + static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) { + return LHS.RecordData == RHS.RecordData; } }; +namespace { +LLVM_PACKED_START +struct PublicSym32Layout { + RecordPrefix Prefix; + PublicSym32Header Pub; + // char Name[]; +}; +LLVM_PACKED_END +} // namespace + +// Calculate how much memory this public needs when serialized. +static uint32_t sizeOfPublic(const BulkPublic &Pub) { + uint32_t NameLen = Pub.NameLen; + NameLen = std::min(NameLen, + uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); + return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); +} + +static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) { + // Assume the caller has allocated sizeOfPublic bytes. + uint32_t NameLen = std::min( + Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1)); + size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4); + assert(Size == sizeOfPublic(Pub)); + auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem); + FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32); + FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2); + FixedMem->Pub.Flags = Pub.Flags; + FixedMem->Pub.Offset = Pub.Offset; + FixedMem->Pub.Segment = Pub.Segment; + char *NameMem = reinterpret_cast<char *>(FixedMem + 1); + memcpy(NameMem, Pub.Name, NameLen); + // Zero the null terminator and remaining bytes. + memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen); + return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size)); +} + uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { uint32_t Size = sizeof(GSIHashHeader); Size += HashRecords.size() * sizeof(PSHashRecord); @@ -84,13 +127,6 @@ uint32_t GSIHashStreamBuilder::calculateSerializedLength() const { return Size; } -uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const { - uint32_t Size = 0; - for (const auto &Sym : Records) - Size += Sym.length(); - return Size; -} - Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) { GSIHashHeader Header; Header.VerSignature = GSIHashHeader::HdrSignature; @@ -115,70 +151,134 @@ static bool isAsciiString(StringRef S) { } // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp -static bool gsiRecordLess(StringRef S1, StringRef S2) { +static int gsiRecordCmp(StringRef S1, StringRef S2) { size_t LS = S1.size(); size_t RS = S2.size(); // Shorter strings always compare less than longer strings. if (LS != RS) - return LS < RS; + return LS - RS; // If either string contains non ascii characters, memcmp them. if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2))) - return memcmp(S1.data(), S2.data(), LS) < 0; + return memcmp(S1.data(), S2.data(), LS); // Both strings are ascii, perform a case-insenstive comparison. - return S1.compare_lower(S2.data()) < 0; + return S1.compare_lower(S2.data()); +} + +void GSIStreamBuilder::finalizePublicBuckets() { + PSH->finalizeBuckets(0, Publics); } -void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) { - std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1> - TmpBuckets; +void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) { + // Build up a list of globals to be bucketed. Use the BulkPublic data + // structure for this purpose, even though these are global records, not + // public records. Most of the same fields are required: + // - Name + // - NameLen + // - SymOffset + // - BucketIdx + // The dead fields are Offset, Segment, and Flags. + std::vector<BulkPublic> Records; + Records.resize(Globals.size()); uint32_t SymOffset = RecordZeroOffset; - for (const CVSymbol &Sym : Records) { - PSHashRecord HR; - // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs. - HR.Off = SymOffset + 1; - HR.CRef = 1; // Always use a refcount of 1. - - // Hash the name to figure out which bucket this goes into. - StringRef Name = getSymbolName(Sym); - size_t BucketIdx = hashStringV1(Name) % IPHR_HASH; - TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR)); - SymOffset += Sym.length(); + for (size_t I = 0, E = Globals.size(); I < E; ++I) { + StringRef Name = getSymbolName(Globals[I]); + Records[I].Name = Name.data(); + Records[I].NameLen = Name.size(); + Records[I].SymOffset = SymOffset; + SymOffset += Globals[I].length(); + } + + GSH->finalizeBuckets(RecordZeroOffset, Records); +} + +void GSIHashStreamBuilder::finalizeBuckets( + uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) { + // Hash every name in parallel. + parallelForEachN(0, Records.size(), [&](size_t I) { + Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH); + }); + + // Count up the size of each bucket. Then, use an exclusive prefix sum to + // calculate the bucket start offsets. This is C++17 std::exclusive_scan, but + // we can't use it yet. + uint32_t BucketStarts[IPHR_HASH] = {0}; + for (const BulkPublic &P : Records) + ++BucketStarts[P.BucketIdx]; + uint32_t Sum = 0; + for (uint32_t &B : BucketStarts) { + uint32_t Size = B; + B = Sum; + Sum += Size; + } + + // Place globals into the hash table in bucket order. When placing a global, + // update the bucket start. Every hash table slot should be filled. Always use + // a refcount of one for now. + HashRecords.resize(Records.size()); + uint32_t BucketCursors[IPHR_HASH]; + memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors)); + for (int I = 0, E = Records.size(); I < E; ++I) { + uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++; + HashRecords[HashIdx].Off = I; + HashRecords[HashIdx].CRef = 1; } - // Compute the three tables: the hash records in bucket and chain order, the - // bucket presence bitmap, and the bucket chain start offsets. - HashRecords.reserve(Records.size()); - for (ulittle32_t &Word : HashBitmap) - Word = 0; - for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) { - auto &Bucket = TmpBuckets[BucketIdx]; - if (Bucket.empty()) - continue; - HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32); - - // Calculate what the offset of the first hash record in the chain would - // be if it were inflated to contain 32-bit pointers. On a 32-bit system, - // each record would be 12 bytes. See HROffsetCalc in gsi.h. - const int SizeOfHROffsetCalc = 12; - ulittle32_t ChainStartOff = - ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc); - HashBuckets.push_back(ChainStartOff); - - // Sort each bucket by memcmp of the symbol's name. It's important that - // we use the same sorting algorithm as is used by the reference - // implementation to ensure that the search for a record within a bucket - // can properly early-out when it detects the record won't be found. The - // algorithm used here corredsponds to the function - // caseInsensitiveComparePchPchCchCch in the reference implementation. - llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left, - const std::pair<StringRef, PSHashRecord> &Right) { - return gsiRecordLess(Left.first, Right.first); - }); - - for (const auto &Entry : Bucket) - HashRecords.push_back(Entry.second); + // Within the buckets, sort each bucket by memcmp of the symbol's name. It's + // important that we use the same sorting algorithm as is used by the + // reference implementation to ensure that the search for a record within a + // bucket can properly early-out when it detects the record won't be found. + // The algorithm used here corresponds to the function + // caseInsensitiveComparePchPchCchCch in the reference implementation. + parallelForEachN(0, IPHR_HASH, [&](size_t I) { + auto B = HashRecords.begin() + BucketStarts[I]; + auto E = HashRecords.begin() + BucketCursors[I]; + if (B == E) + return; + auto BucketCmp = [Records](const PSHashRecord &LHash, + const PSHashRecord &RHash) { + const BulkPublic &L = Records[uint32_t(LHash.Off)]; + const BulkPublic &R = Records[uint32_t(RHash.Off)]; + assert(L.BucketIdx == R.BucketIdx); + int Cmp = gsiRecordCmp(L.getName(), R.getName()); + if (Cmp != 0) + return Cmp < 0; + // This comparison is necessary to make the sorting stable in the presence + // of two static globals with the same name. The easiest way to observe + // this is with S_LDATA32 records. + return L.SymOffset < R.SymOffset; + }; + llvm::sort(B, E, BucketCmp); + + // After we are done sorting, replace the global indices with the stream + // offsets of each global. Add one when writing symbol offsets to disk. + // See GSI1::fixSymRecs. + for (PSHashRecord &HRec : make_range(B, E)) + HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1; + }); + + // For each non-empty bucket, push the bucket start offset into HashBuckets + // and set a bit in the hash bitmap. + for (uint32_t I = 0; I < HashBitmap.size(); ++I) { + uint32_t Word = 0; + for (uint32_t J = 0; J < 32; ++J) { + // Skip empty buckets. + uint32_t BucketIdx = I * 32 + J; + if (BucketIdx >= IPHR_HASH || + BucketStarts[BucketIdx] == BucketCursors[BucketIdx]) + continue; + Word |= (1U << J); + + // Calculate what the offset of the first hash record in the chain would + // be if it were inflated to contain 32-bit pointers. On a 32-bit system, + // each record would be 12 bytes. See HROffsetCalc in gsi.h. + const int SizeOfHROffsetCalc = 12; + ulittle32_t ChainStartOff = + ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc); + HashBuckets.push_back(ChainStartOff); + } + HashBitmap[I] = Word; } } @@ -192,7 +292,7 @@ uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const { uint32_t Size = 0; Size += sizeof(PublicsStreamHeader); Size += PSH->calculateSerializedLength(); - Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap + Size += Publics.size() * sizeof(uint32_t); // AddrMap // FIXME: Add thunk map and section offsets for incremental linking. return Size; @@ -204,103 +304,90 @@ uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const { Error GSIStreamBuilder::finalizeMsfLayout() { // First we write public symbol records, then we write global symbol records. - uint32_t PSHZero = 0; - uint32_t GSHZero = PSH->calculateRecordByteSize(); - - PSH->finalizeBuckets(PSHZero); - GSH->finalizeBuckets(GSHZero); + finalizePublicBuckets(); + finalizeGlobalBuckets(PSH->RecordByteSize); Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize()); if (!Idx) return Idx.takeError(); - GSH->StreamIndex = *Idx; + GlobalsStreamIndex = *Idx; + Idx = Msf.addStream(calculatePublicsHashStreamSize()); if (!Idx) return Idx.takeError(); - PSH->StreamIndex = *Idx; + PublicsStreamIndex = *Idx; - uint32_t RecordBytes = - GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize(); + uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize; Idx = Msf.addStream(RecordBytes); if (!Idx) return Idx.takeError(); - RecordStreamIdx = *Idx; + RecordStreamIndex = *Idx; return Error::success(); } -static bool comparePubSymByAddrAndName( - const std::pair<const CVSymbol *, const PublicSym32 *> &LS, - const std::pair<const CVSymbol *, const PublicSym32 *> &RS) { - if (LS.second->Segment != RS.second->Segment) - return LS.second->Segment < RS.second->Segment; - if (LS.second->Offset != RS.second->Offset) - return LS.second->Offset < RS.second->Offset; +void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) { + assert(Publics.empty() && PSH->RecordByteSize == 0 && + "publics can only be added once"); + Publics = std::move(PublicsIn); - return LS.second->Name < RS.second->Name; -} - -/// Compute the address map. The address map is an array of symbol offsets -/// sorted so that it can be binary searched by address. -static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) { - // Make a vector of pointers to the symbols so we can sort it by address. - // Also gather the symbol offsets while we're at it. - - std::vector<PublicSym32> DeserializedPublics; - std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr; - std::vector<uint32_t> SymOffsets; - DeserializedPublics.reserve(Records.size()); - PublicsByAddr.reserve(Records.size()); - SymOffsets.reserve(Records.size()); + // Sort the symbols by name. PDBs contain lots of symbols, so use parallelism. + parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) { + return L.getName() < R.getName(); + }); + // Assign offsets and calculate the length of the public symbol records. uint32_t SymOffset = 0; - for (const CVSymbol &Sym : Records) { - assert(Sym.kind() == SymbolKind::S_PUB32); - DeserializedPublics.push_back( - cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym))); - PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back()); - SymOffsets.push_back(SymOffset); - SymOffset += Sym.length(); - } - llvm::stable_sort(PublicsByAddr, comparePubSymByAddrAndName); - - // Fill in the symbol offsets in the appropriate order. - std::vector<ulittle32_t> AddrMap; - AddrMap.reserve(Records.size()); - for (auto &Sym : PublicsByAddr) { - ptrdiff_t Idx = std::distance(Records.data(), Sym.first); - assert(Idx >= 0 && size_t(Idx) < Records.size()); - AddrMap.push_back(ulittle32_t(SymOffsets[Idx])); + for (BulkPublic &Pub : Publics) { + Pub.SymOffset = SymOffset; + SymOffset += sizeOfPublic(Pub); } - return AddrMap; -} -uint32_t GSIStreamBuilder::getPublicsStreamIndex() const { - return PSH->StreamIndex; + // Remember the length of the public stream records. + PSH->RecordByteSize = SymOffset; } -uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const { - return GSH->StreamIndex; +void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) { - PSH->addSymbol(Pub, Msf); +void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) { - GSH->addSymbol(Sym, Msf); +void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { + serializeAndAddGlobal(Sym); } -void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) { - GSH->addSymbol(Sym, Msf); +template <typename T> +void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) { + T Copy(Symbol); + addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(), + CodeViewContainer::Pdb)); } -void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) { - GSH->addSymbol(Sym, Msf); +void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) { + // Ignore duplicate typedefs and constants. + if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) { + auto Iter = GlobalsSeen.insert(Symbol); + if (!Iter.second) + return; + } + GSH->RecordByteSize += Symbol.length(); + Globals.push_back(Symbol); } -void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) { - GSH->addSymbol(Sym); +// Serialize each public and write it. +static Error writePublics(BinaryStreamWriter &Writer, + ArrayRef<BulkPublic> Publics) { + std::vector<uint8_t> Storage; + for (const BulkPublic &Pub : Publics) { + Storage.resize(sizeOfPublic(Pub)); + serializePublic(Storage.data(), Pub); + if (Error E = Writer.writeBytes(Storage)) + return E; + } + return Error::success(); } static Error writeRecords(BinaryStreamWriter &Writer, @@ -318,14 +405,42 @@ Error GSIStreamBuilder::commitSymbolRecordStream( // Write public symbol records first, followed by global symbol records. This // must match the order that we assume in finalizeMsfLayout when computing // PSHZero and GSHZero. - if (auto EC = writeRecords(Writer, PSH->Records)) + if (auto EC = writePublics(Writer, Publics)) return EC; - if (auto EC = writeRecords(Writer, GSH->Records)) + if (auto EC = writeRecords(Writer, Globals)) return EC; return Error::success(); } +static std::vector<support::ulittle32_t> +computeAddrMap(ArrayRef<BulkPublic> Publics) { + // Build a parallel vector of indices into the Publics vector, and sort it by + // address. + std::vector<ulittle32_t> PubAddrMap; + PubAddrMap.reserve(Publics.size()); + for (int I = 0, E = Publics.size(); I < E; ++I) + PubAddrMap.push_back(ulittle32_t(I)); + + auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) { + const BulkPublic &L = Publics[LIdx]; + const BulkPublic &R = Publics[RIdx]; + if (L.Segment != R.Segment) + return L.Segment < R.Segment; + if (L.Offset != R.Offset) + return L.Offset < R.Offset; + // parallelSort is unstable, so we have to do name comparison to ensure + // that two names for the same location come out in a deterministic order. + return L.getName() < R.getName(); + }; + parallelSort(PubAddrMap, AddrCmp); + + // Rewrite the public symbol indices into symbol offsets. + for (ulittle32_t &Entry : PubAddrMap) + Entry = Publics[Entry].SymOffset; + return PubAddrMap; +} + Error GSIStreamBuilder::commitPublicsHashStream( WritableBinaryStreamRef Stream) { BinaryStreamWriter Writer(Stream); @@ -333,7 +448,7 @@ Error GSIStreamBuilder::commitPublicsHashStream( // FIXME: Fill these in. They are for incremental linking. Header.SymHash = PSH->calculateSerializedLength(); - Header.AddrMap = PSH->Records.size() * 4; + Header.AddrMap = Publics.size() * 4; Header.NumThunks = 0; Header.SizeOfThunk = 0; Header.ISectThunkTable = 0; @@ -346,8 +461,9 @@ Error GSIStreamBuilder::commitPublicsHashStream( if (auto EC = PSH->commit(Writer)) return EC; - std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records); - if (auto EC = Writer.writeArray(makeArrayRef(AddrMap))) + std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics); + assert(PubAddrMap.size() == Publics.size()); + if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap))) return EC; return Error::success(); @@ -366,7 +482,7 @@ Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout, auto PS = WritableMappedBlockStream::createIndexedStream( Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator()); auto PRS = WritableMappedBlockStream::createIndexedStream( - Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator()); + Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator()); if (auto EC = commitSymbolRecordStream(*PRS)) return EC; diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp index 39ae84acba20..7717f062eac1 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp @@ -49,11 +49,11 @@ SymIndexId NativeCompilandSymbol::getLexicalParentId() const { return 0; } // this potential confusion. std::string NativeCompilandSymbol::getLibraryName() const { - return Module.getObjFileName(); + return std::string(Module.getObjFileName()); } std::string NativeCompilandSymbol::getName() const { - return Module.getModuleName(); + return std::string(Module.getModuleName()); } } // namespace pdb diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp index 2f6a5bc3d574..7a258acbd7c0 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp @@ -48,19 +48,19 @@ public: std::string getFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.FileNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } std::string getObjectFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.ObjNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } std::string getVirtualFileName() const override { StringRef Ret = cantFail(Strings.getStringForID(Entry.VFileNI), "InjectedSourceStream should have rejected this"); - return Ret; + return std::string(Ret); } uint32_t getCompression() const override { return Entry.Compression; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp new file mode 100644 index 000000000000..1e4b07646335 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp @@ -0,0 +1,42 @@ +//==- NativeEnumLineNumbers.cpp - Native Type Enumerator impl ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" +#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" +#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativeEnumLineNumbers::NativeEnumLineNumbers( + std::vector<NativeLineNumber> LineNums) + : Lines(std::move(LineNums)), Index(0) {} + +uint32_t NativeEnumLineNumbers::getChildCount() const { + return static_cast<uint32_t>(Lines.size()); +} + +std::unique_ptr<IPDBLineNumber> +NativeEnumLineNumbers::getChildAtIndex(uint32_t N) const { + if (N >= getChildCount()) + return nullptr; + return std::make_unique<NativeLineNumber>(Lines[N]); +} + +std::unique_ptr<IPDBLineNumber> NativeEnumLineNumbers::getNext() { + return getChildAtIndex(Index++); +} + +void NativeEnumLineNumbers::reset() { Index = 0; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp index 3f393409129b..895f8943157a 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp @@ -73,7 +73,7 @@ uint32_t NativeExeSymbol::getAge() const { } std::string NativeExeSymbol::getSymbolsFileName() const { - return Session.getPDBFile().getFilePath(); + return std::string(Session.getPDBFile().getFilePath()); } codeview::GUID NativeExeSymbol::getGuid() const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp new file mode 100644 index 000000000000..2537daa7493c --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp @@ -0,0 +1,57 @@ +//===- NativeFunctionSymbol.cpp - info about function symbols----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativeFunctionSymbol::NativeFunctionSymbol(NativeSession &Session, + SymIndexId Id, + const codeview::ProcSym &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativeFunctionSymbol::~NativeFunctionSymbol() {} + +void NativeFunctionSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "length", getLength(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativeFunctionSymbol::getAddressOffset() const { + return Sym.CodeOffset; +} + +uint32_t NativeFunctionSymbol::getAddressSection() const { return Sym.Segment; } +std::string NativeFunctionSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativeFunctionSymbol::getSymTag() const { + return PDB_SymType::Function; +} + +uint64_t NativeFunctionSymbol::getLength() const { return Sym.CodeSize; } + +uint32_t NativeFunctionSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} + +uint64_t NativeFunctionSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.CodeOffset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp new file mode 100644 index 000000000000..2535e09baf62 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp @@ -0,0 +1,50 @@ +//===- NativeLineNumber.cpp - Native line number implementation -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeLineNumber.h" + +using namespace llvm; +using namespace llvm::pdb; + +NativeLineNumber::NativeLineNumber(const NativeSession &Session, + const codeview::LineInfo Line, + uint32_t ColumnNumber, uint32_t Section, + uint32_t Offset, uint32_t Length, + uint32_t SrcFileId) + : Session(Session), Line(Line), ColumnNumber(ColumnNumber), + Section(Section), Offset(Offset), Length(Length), SrcFileId(SrcFileId) {} + +uint32_t NativeLineNumber::getLineNumber() const { return Line.getStartLine(); } + +uint32_t NativeLineNumber::getLineNumberEnd() const { + return Line.getEndLine(); +} + +uint32_t NativeLineNumber::getColumnNumber() const { return ColumnNumber; } + +uint32_t NativeLineNumber::getColumnNumberEnd() const { return 0; } + +uint32_t NativeLineNumber::getAddressSection() const { return Section; } + +uint32_t NativeLineNumber::getAddressOffset() const { return Offset; } + +uint32_t NativeLineNumber::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Section, Offset); +} + +uint64_t NativeLineNumber::getVirtualAddress() const { + return Session.getVAFromSectOffset(Section, Offset); +} + +uint32_t NativeLineNumber::getLength() const { return Length; } + +uint32_t NativeLineNumber::getSourceFileId() const { return SrcFileId; } + +uint32_t NativeLineNumber::getCompilandId() const { return 0; } + +bool NativeLineNumber::isStatement() const { return Line.isStatement(); } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp new file mode 100644 index 000000000000..7086af7e67a2 --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp @@ -0,0 +1,52 @@ +//===- NativePublicSymbol.cpp - info about public symbols -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" + +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h" +#include "llvm/DebugInfo/PDB/Native/NativeTypeEnum.h" + +using namespace llvm; +using namespace llvm::codeview; +using namespace llvm::pdb; + +NativePublicSymbol::NativePublicSymbol(NativeSession &Session, SymIndexId Id, + const codeview::PublicSym32 &Sym) + : NativeRawSymbol(Session, PDB_SymType::Data, Id), Sym(Sym) {} + +NativePublicSymbol::~NativePublicSymbol() {} + +void NativePublicSymbol::dump(raw_ostream &OS, int Indent, + PdbSymbolIdField ShowIdFields, + PdbSymbolIdField RecurseIdFields) const { + NativeRawSymbol::dump(OS, Indent, ShowIdFields, RecurseIdFields); + dumpSymbolField(OS, "name", getName(), Indent); + dumpSymbolField(OS, "offset", getAddressOffset(), Indent); + dumpSymbolField(OS, "section", getAddressSection(), Indent); +} + +uint32_t NativePublicSymbol::getAddressOffset() const { return Sym.Offset; } + +uint32_t NativePublicSymbol::getAddressSection() const { return Sym.Segment; } + +std::string NativePublicSymbol::getName() const { + return std::string(Sym.Name); +} + +PDB_SymType NativePublicSymbol::getSymTag() const { + return PDB_SymType::PublicSymbol; +} + +uint32_t NativePublicSymbol::getRelativeVirtualAddress() const { + return Session.getRVAFromSectOffset(Sym.Segment, Sym.Offset); +} + +uint64_t NativePublicSymbol::getVirtualAddress() const { + return Session.getVAFromSectOffset(Sym.Segment, Sym.Offset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp index b45a5881dcb5..ac8449df44ff 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp @@ -12,6 +12,7 @@ #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h" #include "llvm/DebugInfo/PDB/IPDBSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" @@ -25,11 +26,14 @@ #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h" #include "llvm/DebugInfo/PDB/PDBSymbolExe.h" #include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h" +#include "llvm/Object/COFF.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include <algorithm> #include <cassert> @@ -75,14 +79,125 @@ Error NativeSession::createFromPdb(std::unique_ptr<MemoryBuffer> Buffer, return Error::success(); } -Error NativeSession::createFromExe(StringRef Path, +static Expected<std::unique_ptr<PDBFile>> +loadPdbFile(StringRef PdbPath, std::unique_ptr<BumpPtrAllocator> &Allocator) { + ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = + MemoryBuffer::getFile(PdbPath, /*FileSize=*/-1, + /*RequiresNullTerminator=*/false); + if (!ErrorOrBuffer) + return make_error<RawError>(ErrorOrBuffer.getError()); + std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*ErrorOrBuffer); + + PdbPath = Buffer->getBufferIdentifier(); + file_magic Magic; + auto EC = identify_magic(PdbPath, Magic); + if (EC || Magic != file_magic::pdb) + return make_error<RawError>(EC); + + auto Stream = std::make_unique<MemoryBufferByteStream>(std::move(Buffer), + llvm::support::little); + + auto File = std::make_unique<PDBFile>(PdbPath, std::move(Stream), *Allocator); + if (auto EC = File->parseFileHeaders()) + return std::move(EC); + + if (auto EC = File->parseStreamData()) + return std::move(EC); + + return std::move(File); +} + +Error NativeSession::createFromPdbPath(StringRef PdbPath, + std::unique_ptr<IPDBSession> &Session) { + auto Allocator = std::make_unique<BumpPtrAllocator>(); + auto PdbFile = loadPdbFile(PdbPath, Allocator); + if (!PdbFile) + return PdbFile.takeError(); + + Session = std::make_unique<NativeSession>(std::move(PdbFile.get()), + std::move(Allocator)); + return Error::success(); +} + +static Expected<std::string> getPdbPathFromExe(StringRef ExePath) { + Expected<object::OwningBinary<object::Binary>> BinaryFile = + object::createBinary(ExePath); + if (!BinaryFile) + return BinaryFile.takeError(); + + const object::COFFObjectFile *ObjFile = + dyn_cast<object::COFFObjectFile>(BinaryFile->getBinary()); + if (!ObjFile) + return make_error<RawError>(raw_error_code::invalid_format); + + StringRef PdbPath; + const llvm::codeview::DebugInfo *PdbInfo = nullptr; + if (Error E = ObjFile->getDebugPDBInfo(PdbInfo, PdbPath)) + return std::move(E); + + return std::string(PdbPath); +} + +Error NativeSession::createFromExe(StringRef ExePath, std::unique_ptr<IPDBSession> &Session) { - return make_error<RawError>(raw_error_code::feature_unsupported); + Expected<std::string> PdbPath = getPdbPathFromExe(ExePath); + if (!PdbPath) + return PdbPath.takeError(); + + file_magic Magic; + auto EC = identify_magic(PdbPath.get(), Magic); + if (EC || Magic != file_magic::pdb) + return make_error<RawError>(EC); + + auto Allocator = std::make_unique<BumpPtrAllocator>(); + auto File = loadPdbFile(PdbPath.get(), Allocator); + if (!File) + return File.takeError(); + + Session = std::make_unique<NativeSession>(std::move(File.get()), + std::move(Allocator)); + + return Error::success(); } -uint64_t NativeSession::getLoadAddress() const { return 0; } +Expected<std::string> +NativeSession::searchForPdb(const PdbSearchOptions &Opts) { + Expected<std::string> PathOrErr = getPdbPathFromExe(Opts.ExePath); + if (!PathOrErr) + return PathOrErr.takeError(); + StringRef PathFromExe = PathOrErr.get(); + sys::path::Style Style = PathFromExe.startswith("/") + ? sys::path::Style::posix + : sys::path::Style::windows; + StringRef PdbName = sys::path::filename(PathFromExe, Style); + + // Check if pdb exists in the executable directory. + SmallString<128> PdbPath = StringRef(Opts.ExePath); + sys::path::remove_filename(PdbPath); + sys::path::append(PdbPath, PdbName); -bool NativeSession::setLoadAddress(uint64_t Address) { return false; } + auto Allocator = std::make_unique<BumpPtrAllocator>(); + + if (auto File = loadPdbFile(PdbPath, Allocator)) + return std::string(PdbPath); + else + consumeError(File.takeError()); + + // Check path that was in the executable. + if (auto File = loadPdbFile(PathFromExe, Allocator)) + return std::string(PathFromExe); + else + return File.takeError(); + + return make_error<RawError>("PDB not found"); +} + +uint64_t NativeSession::getLoadAddress() const { return LoadAddress; } + +bool NativeSession::setLoadAddress(uint64_t Address) { + LoadAddress = Address; + return true; +} std::unique_ptr<PDBSymbolExe> NativeSession::getGlobalScope() { return PDBSymbol::createAs<PDBSymbolExe>(*this, getNativeGlobalScope()); @@ -95,28 +210,52 @@ NativeSession::getSymbolById(SymIndexId SymbolId) const { bool NativeSession::addressForVA(uint64_t VA, uint32_t &Section, uint32_t &Offset) const { - return false; + uint32_t RVA = VA - getLoadAddress(); + return addressForRVA(RVA, Section, Offset); } -bool NativeSession::addressForRVA(uint32_t VA, uint32_t &Section, +bool NativeSession::addressForRVA(uint32_t RVA, uint32_t &Section, uint32_t &Offset) const { - return false; + Section = 0; + Offset = 0; + + auto Dbi = Pdb->getPDBDbiStream(); + if (!Dbi) + return false; + + if ((int32_t)RVA < 0) + return true; + + Offset = RVA; + for (; Section < Dbi->getSectionHeaders().size(); ++Section) { + auto &Sec = Dbi->getSectionHeaders()[Section]; + if (RVA < Sec.VirtualAddress) + return true; + Offset = RVA - Sec.VirtualAddress; + } + return true; } std::unique_ptr<PDBSymbol> -NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) const { - return nullptr; +NativeSession::findSymbolByAddress(uint64_t Address, PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForVA(Address, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } -std::unique_ptr<PDBSymbol> -NativeSession::findSymbolByRVA(uint32_t RVA, PDB_SymType Type) const { - return nullptr; +std::unique_ptr<PDBSymbol> NativeSession::findSymbolByRVA(uint32_t RVA, + PDB_SymType Type) { + uint32_t Section; + uint32_t Offset; + addressForRVA(RVA, Section, Offset); + return findSymbolBySectOffset(Section, Offset, Type); } std::unique_ptr<PDBSymbol> NativeSession::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, - PDB_SymType Type) const { - return nullptr; + PDB_SymType Type) { + return Cache.findSymbolBySectOffset(Sect, Offset, Type); } std::unique_ptr<IPDBEnumLineNumbers> @@ -128,18 +267,19 @@ NativeSession::findLineNumbers(const PDBSymbolCompiland &Compiland, std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const { - return nullptr; + return Cache.findLineNumbersByVA(Address, Length); } std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersByRVA(uint32_t RVA, uint32_t Length) const { - return nullptr; + return findLineNumbersByAddress(getLoadAddress() + RVA, Length); } std::unique_ptr<IPDBEnumLineNumbers> NativeSession::findLineNumbersBySectOffset(uint32_t Section, uint32_t Offset, uint32_t Length) const { - return nullptr; + uint64_t VA = getVAFromSectOffset(Section, Offset); + return findLineNumbersByAddress(VA, Length); } std::unique_ptr<IPDBEnumSourceFiles> @@ -179,7 +319,7 @@ std::unique_ptr<IPDBEnumSourceFiles> NativeSession::getSourceFilesForCompiland( std::unique_ptr<IPDBSourceFile> NativeSession::getSourceFileById(uint32_t FileId) const { - return nullptr; + return Cache.getSourceFileById(FileId); } std::unique_ptr<IPDBEnumDataStreams> NativeSession::getDebugStreams() const { @@ -225,3 +365,24 @@ NativeExeSymbol &NativeSession::getNativeGlobalScope() const { return Cache.getNativeSymbolById<NativeExeSymbol>(ExeSymbol); } + +uint32_t NativeSession::getRVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + if (Section <= 0) + return 0; + + auto Dbi = getDbiStreamPtr(*Pdb); + if (!Dbi) + return 0; + + uint32_t MaxSection = Dbi->getSectionHeaders().size(); + if (Section > MaxSection + 1) + Section = MaxSection + 1; + auto &Sec = Dbi->getSectionHeaders()[Section - 1]; + return Sec.VirtualAddress + Offset; +} + +uint64_t NativeSession::getVAFromSectOffset(uint32_t Section, + uint32_t Offset) const { + return LoadAddress + getRVAFromSectOffset(Section, Offset); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp new file mode 100644 index 000000000000..6473207e058a --- /dev/null +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp @@ -0,0 +1,47 @@ +//===- NativeSourceFile.cpp - Native line number implementaiton -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h" +#include "llvm/DebugInfo/PDB/Native/NativeSession.h" + +using namespace llvm; +using namespace llvm::pdb; + +NativeSourceFile::NativeSourceFile(NativeSession &Session, uint32_t FileId, + const codeview::FileChecksumEntry &Checksum) + : Session(Session), FileId(FileId), Checksum(Checksum) {} + +std::string NativeSourceFile::getFileName() const { + auto ST = Session.getPDBFile().getStringTable(); + if (!ST) { + consumeError(ST.takeError()); + return ""; + } + auto FileName = ST->getStringTable().getString(Checksum.FileNameOffset); + if (!FileName) { + consumeError(FileName.takeError()); + return ""; + } + + return std::string(FileName.get()); +} + +uint32_t NativeSourceFile::getUniqueId() const { return FileId; } + +std::string NativeSourceFile::getChecksum() const { + return toStringRef(Checksum.Checksum).str(); +} + +PDB_Checksum NativeSourceFile::getChecksumType() const { + return static_cast<PDB_Checksum>(Checksum.Kind); +} + +std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>> +NativeSourceFile::getCompilands() const { + return nullptr; +} diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp index 704c1254afbf..e5f1dcaf801e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp @@ -51,7 +51,9 @@ SymIndexId NativeSymbolEnumerator::getClassParentId() const { SymIndexId NativeSymbolEnumerator::getLexicalParentId() const { return 0; } -std::string NativeSymbolEnumerator::getName() const { return Record.Name; } +std::string NativeSymbolEnumerator::getName() const { + return std::string(Record.Name); +} SymIndexId NativeSymbolEnumerator::getTypeId() const { return Parent.getTypeId(); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp index 80d455ad66e9..63ac9fae0e87 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp @@ -63,4 +63,4 @@ SymIndexId NativeTypeArray::getTypeId() const { Record.getElementType()); } -uint64_t NativeTypeArray::getLength() const { return Record.Size; }
\ No newline at end of file +uint64_t NativeTypeArray::getLength() const { return Record.Size; } diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp index 26ccb7daece0..aaec3a5e7c60 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp @@ -305,7 +305,7 @@ std::string NativeTypeEnum::getName() const { if (UnmodifiedType) return UnmodifiedType->getName(); - return Record->getName(); + return std::string(Record->getName()); } bool NativeTypeEnum::isNested() const { diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp index 60b373282267..72964a9e0d4d 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp @@ -20,7 +20,9 @@ void NativeTypeTypedef::dump(raw_ostream &OS, int Indent, PdbSymbolIdField::Type, ShowIdFields, RecurseIdFields); } -std::string NativeTypeTypedef::getName() const { return Record.Name; } +std::string NativeTypeTypedef::getName() const { + return std::string(Record.Name); +} SymIndexId NativeTypeTypedef::getTypeId() const { return Session.getSymbolCache().findSymbolByTypeIndex(Record.Type); diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp index be67846c0b24..b0be7f76e86e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp @@ -74,7 +74,7 @@ std::string NativeTypeUDT::getName() const { if (UnmodifiedType) return UnmodifiedType->getName(); - return Tag->getName(); + return std::string(Tag->getName()); } SymIndexId NativeTypeUDT::getLexicalParentId() const { return 0; } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp index 9ac226b89139..cde645236851 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp @@ -41,7 +41,8 @@ typedef FixedStreamArray<support::ulittle32_t> ulittle_array; PDBFile::PDBFile(StringRef Path, std::unique_ptr<BinaryStream> PdbFileBuffer, BumpPtrAllocator &Allocator) - : FilePath(Path), Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} + : FilePath(std::string(Path)), Allocator(Allocator), + Buffer(std::move(PdbFileBuffer)) {} PDBFile::~PDBFile() = default; diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp index aa3288724390..deb0f201a71e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp @@ -7,9 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h" - #include "llvm/ADT/BitVector.h" - #include "llvm/DebugInfo/MSF/MSFBuilder.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h" @@ -23,6 +21,7 @@ #include "llvm/Support/BinaryStream.h" #include "llvm/Support/BinaryStreamWriter.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/Chrono.h" #include "llvm/Support/Path.h" #include "llvm/Support/xxhash.h" @@ -95,7 +94,7 @@ Error PDBFileBuilder::addNamedStream(StringRef Name, StringRef Data) { if (!ExpectedIndex) return ExpectedIndex.takeError(); assert(NamedStreamData.count(*ExpectedIndex) == 0); - NamedStreamData[*ExpectedIndex] = Data; + NamedStreamData[*ExpectedIndex] = std::string(Data); return Error::success(); } @@ -144,7 +143,7 @@ Error PDBFileBuilder::finalizeMsfLayout() { if (Dbi) { Dbi->setPublicsStreamIndex(Gsi->getPublicsStreamIndex()); Dbi->setGlobalsStreamIndex(Gsi->getGlobalsStreamIndex()); - Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIdx()); + Dbi->setSymbolRecordStreamIndex(Gsi->getRecordStreamIndex()); } } if (Tpi) { diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp index 5cdd628312fe..9f15907b519e 100644 --- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp @@ -1,13 +1,18 @@ #include "llvm/DebugInfo/PDB/Native/SymbolCache.h" +#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h" #include "llvm/DebugInfo/PDB/Native/DbiStream.h" #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" +#include "llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h" #include "llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h" +#include "llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h" #include "llvm/DebugInfo/PDB/Native/NativeEnumTypes.h" +#include "llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h" +#include "llvm/DebugInfo/PDB/Native/NativePublicSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeArray.h" @@ -19,6 +24,7 @@ #include "llvm/DebugInfo/PDB/Native/NativeTypeUDT.h" #include "llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" +#include "llvm/DebugInfo/PDB/Native/PublicsStream.h" #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" #include "llvm/DebugInfo/PDB/Native/TpiStream.h" #include "llvm/DebugInfo/PDB/PDBSymbol.h" @@ -62,9 +68,10 @@ static const struct BuiltinTypeEntry { }; SymbolCache::SymbolCache(NativeSession &Session, DbiStream *Dbi) - : Session(Session), Dbi(Dbi) { + : Session(Session), Dbi(Dbi), AddrToModuleIndex(IMapAllocator) { // Id 0 is reserved for the invalid symbol. Cache.push_back(nullptr); + SourceFiles.push_back(nullptr); if (Dbi) Compilands.resize(Dbi->modules().getModuleCount()); @@ -281,6 +288,312 @@ SymIndexId SymbolCache::getOrCreateGlobalSymbolByOffset(uint32_t Offset) { return Id; } +Expected<ModuleDebugStreamRef> +SymbolCache::getModuleDebugStream(uint32_t Index) const { + assert(Dbi && "Dbi stream not present"); + + DbiModuleDescriptor Modi = Dbi->modules().getModuleDescriptor(Index); + + uint16_t ModiStream = Modi.getModuleStreamIndex(); + if (ModiStream == kInvalidStreamIndex) + return make_error<RawError>("Module stream not present"); + + std::unique_ptr<msf::MappedBlockStream> ModStreamData = + Session.getPDBFile().createIndexedStream(ModiStream); + + ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData)); + if (auto EC = ModS.reload()) + return std::move(EC); + + return std::move(ModS); +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findSymbolBySectOffset(uint32_t Sect, uint32_t Offset, + PDB_SymType Type) { + if (AddrToModuleIndex.empty()) + parseSectionContribs(); + + switch (Type) { + case PDB_SymType::Function: + return findFunctionSymbolBySectOffset(Sect, Offset); + case PDB_SymType::PublicSymbol: + return findPublicSymbolBySectOffset(Sect, Offset); + case PDB_SymType::None: { + // FIXME: Implement for PDB_SymType::Data. + if (auto Sym = findFunctionSymbolBySectOffset(Sect, Offset)) + return Sym; + return nullptr; + } + default: + return nullptr; + } +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findFunctionSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToFunctionSymId.find({Sect, Offset}); + if (Iter != AddressToFunctionSymId.end()) + return getSymbolById(Iter->second); + + if (!Dbi) + return nullptr; + + auto Modi = getModuleIndexForAddr(Session.getVAFromSectOffset(Sect, Offset)); + if (!Modi) + return nullptr; + + auto ExpectedModS = getModuleDebugStream(*Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return nullptr; + } + CVSymbolArray Syms = ExpectedModS->getSymbolArray(); + + // Search for the symbol in this module. + for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) { + if (I->kind() != S_LPROC32 && I->kind() != S_GPROC32) + continue; + auto PS = cantFail(SymbolDeserializer::deserializeAs<ProcSym>(*I)); + if (Sect == PS.Segment && Offset >= PS.CodeOffset && + Offset < PS.CodeOffset + PS.CodeSize) { + SymIndexId Id = createSymbol<NativeFunctionSymbol>(PS); + AddressToFunctionSymId.insert({{Sect, Offset}, Id}); + return getSymbolById(Id); + } + + // Jump to the end of this ProcSym. + I = Syms.at(PS.End); + } + return nullptr; +} + +std::unique_ptr<PDBSymbol> +SymbolCache::findPublicSymbolBySectOffset(uint32_t Sect, uint32_t Offset) { + auto Iter = AddressToPublicSymId.find({Sect, Offset}); + if (Iter != AddressToPublicSymId.end()) + return getSymbolById(Iter->second); + + auto Publics = Session.getPDBFile().getPDBPublicsStream(); + if (!Publics) + return nullptr; + + auto ExpectedSyms = Session.getPDBFile().getPDBSymbolStream(); + if (!ExpectedSyms) + return nullptr; + BinaryStreamRef SymStream = + ExpectedSyms->getSymbolArray().getUnderlyingStream(); + + // Use binary search to find the first public symbol with an address greater + // than or equal to Sect, Offset. + auto AddrMap = Publics->getAddressMap(); + auto First = AddrMap.begin(); + auto It = AddrMap.begin(); + size_t Count = AddrMap.size(); + size_t Half; + while (Count > 0) { + It = First; + Half = Count / 2; + It += Half; + Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + + auto PS = + cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get())); + if (PS.Segment < Sect || (PS.Segment == Sect && PS.Offset <= Offset)) { + First = ++It; + Count -= Half + 1; + } else + Count = Half; + } + if (It == AddrMap.begin()) + return nullptr; + --It; + + Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, *It); + if (!Sym) { + consumeError(Sym.takeError()); + return nullptr; + } + auto PS = cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym.get())); + SymIndexId Id = createSymbol<NativePublicSymbol>(PS); + AddressToPublicSymId.insert({{Sect, Offset}, Id}); + return getSymbolById(Id); +} + +std::vector<SymbolCache::LineTableEntry> +SymbolCache::findLineTable(uint16_t Modi) const { + // Check if this module has already been added. + auto LineTableIter = LineTable.find(Modi); + if (LineTableIter != LineTable.end()) + return LineTableIter->second; + + std::vector<LineTableEntry> &ModuleLineTable = LineTable[Modi]; + + // If there is an error or there are no lines, just return the + // empty vector. + Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return ModuleLineTable; + } + + std::vector<std::vector<LineTableEntry>> EntryList; + for (const auto &SS : ExpectedModS->getSubsectionsArray()) { + if (SS.kind() != DebugSubsectionKind::Lines) + continue; + + DebugLinesSubsectionRef Lines; + BinaryStreamReader Reader(SS.getRecordData()); + if (auto EC = Lines.initialize(Reader)) { + consumeError(std::move(EC)); + continue; + } + + uint32_t RelocSegment = Lines.header()->RelocSegment; + uint32_t RelocOffset = Lines.header()->RelocOffset; + for (const LineColumnEntry &Group : Lines) { + if (Group.LineNumbers.empty()) + continue; + + std::vector<LineTableEntry> Entries; + + // If there are column numbers, then they should be in a parallel stream + // to the line numbers. + auto ColIt = Group.Columns.begin(); + auto ColsEnd = Group.Columns.end(); + + for (const LineNumberEntry &LN : Group.LineNumbers) { + uint64_t VA = + Session.getVAFromSectOffset(RelocSegment, RelocOffset + LN.Offset); + LineInfo Line(LN.Flags); + uint32_t ColNum = 0; + + if (Lines.hasColumnInfo() && ColIt != ColsEnd) { + ColNum = ColIt->StartColumn; + ++ColIt; + } + Entries.push_back({VA, Line, ColNum, Group.NameIndex, false}); + } + + // Add a terminal entry line to mark the end of this subsection. + uint64_t VA = Session.getVAFromSectOffset( + RelocSegment, RelocOffset + Lines.header()->CodeSize); + LineInfo LastLine(Group.LineNumbers.back().Flags); + uint32_t ColNum = + (Lines.hasColumnInfo()) ? Group.Columns.back().StartColumn : 0; + Entries.push_back({VA, LastLine, ColNum, Group.NameIndex, true}); + + EntryList.push_back(Entries); + } + } + + // Sort EntryList, and add flattened contents to the line table. + std::sort(EntryList.begin(), EntryList.end(), + [](const std::vector<LineTableEntry> &LHS, + const std::vector<LineTableEntry> &RHS) { + return LHS[0].Addr < RHS[0].Addr; + }); + for (size_t I = 0; I < EntryList.size(); ++I) + ModuleLineTable.insert(ModuleLineTable.end(), EntryList[I].begin(), + EntryList[I].end()); + + return ModuleLineTable; +} + +std::unique_ptr<IPDBEnumLineNumbers> +SymbolCache::findLineNumbersByVA(uint64_t VA, uint32_t Length) const { + Optional<uint16_t> MaybeModi = getModuleIndexForAddr(VA); + if (!MaybeModi) + return nullptr; + uint16_t Modi = *MaybeModi; + + std::vector<LineTableEntry> Lines = findLineTable(Modi); + if (Lines.empty()) + return nullptr; + + // Find the first line in the line table whose address is not greater than + // the one we are searching for. + auto LineIter = llvm::partition_point(Lines, [&](const LineTableEntry &E) { + return (E.Addr < VA || (E.Addr == VA && E.IsTerminalEntry)); + }); + + // Try to back up if we've gone too far. + if (LineIter == Lines.end() || LineIter->Addr > VA) { + if (LineIter == Lines.begin() || std::prev(LineIter)->IsTerminalEntry) + return nullptr; + --LineIter; + } + + Expected<ModuleDebugStreamRef> ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + return nullptr; + } + Expected<DebugChecksumsSubsectionRef> ExpectedChecksums = + ExpectedModS->findChecksumsSubsection(); + if (!ExpectedChecksums) { + consumeError(ExpectedChecksums.takeError()); + return nullptr; + } + + // Populate a vector of NativeLineNumbers that have addresses in the given + // address range. + Optional<uint16_t> EndModi = getModuleIndexForAddr(VA + Length); + if (!EndModi) + return nullptr; + std::vector<NativeLineNumber> LineNumbers; + while (Modi <= *EndModi) { + // If we reached the end of the current module, increment Modi and get the + // new line table and checksums array. + if (LineIter == Lines.end()) { + ++Modi; + + ExpectedModS = getModuleDebugStream(Modi); + if (!ExpectedModS) { + consumeError(ExpectedModS.takeError()); + break; + } + ExpectedChecksums = ExpectedModS->findChecksumsSubsection(); + if (!ExpectedChecksums) { + consumeError(ExpectedChecksums.takeError()); + break; + } + + Lines = findLineTable(Modi); + LineIter = Lines.begin(); + + if (Lines.empty()) + continue; + } + + if (LineIter->IsTerminalEntry) { + ++LineIter; + continue; + } + + // If the line is still within the address range, create a NativeLineNumber + // and add to the list. + if (LineIter->Addr > VA + Length) + break; + + uint32_t LineSect, LineOff; + Session.addressForVA(LineIter->Addr, LineSect, LineOff); + uint32_t LineLength = std::next(LineIter)->Addr - LineIter->Addr; + auto ChecksumIter = + ExpectedChecksums->getArray().at(LineIter->FileNameIndex); + uint32_t SrcFileId = getOrCreateSourceFile(*ChecksumIter); + NativeLineNumber LineNum(Session, LineIter->Line, LineIter->ColumnNumber, + LineSect, LineOff, LineLength, SrcFileId); + LineNumbers.push_back(LineNum); + ++LineIter; + } + return std::make_unique<NativeEnumLineNumbers>(std::move(LineNumbers)); +} + std::unique_ptr<PDBSymbolCompiland> SymbolCache::getOrCreateCompiland(uint32_t Index) { if (!Dbi) @@ -297,3 +610,65 @@ SymbolCache::getOrCreateCompiland(uint32_t Index) { return Session.getConcreteSymbolById<PDBSymbolCompiland>(Compilands[Index]); } + +std::unique_ptr<IPDBSourceFile> +SymbolCache::getSourceFileById(SymIndexId FileId) const { + assert(FileId < SourceFiles.size()); + + // Id 0 is reserved. + if (FileId == 0) + return nullptr; + + return std::unique_ptr<NativeSourceFile>( + new NativeSourceFile(*SourceFiles[FileId].get())); +} + +SymIndexId +SymbolCache::getOrCreateSourceFile(const FileChecksumEntry &Checksums) const { + auto Iter = FileNameOffsetToId.find(Checksums.FileNameOffset); + if (Iter != FileNameOffsetToId.end()) + return Iter->second; + + SymIndexId Id = SourceFiles.size(); + auto SrcFile = std::make_unique<NativeSourceFile>(Session, Id, Checksums); + SourceFiles.push_back(std::move(SrcFile)); + FileNameOffsetToId[Checksums.FileNameOffset] = Id; + return Id; +} + +void SymbolCache::parseSectionContribs() { + if (!Dbi) + return; + + class Visitor : public ISectionContribVisitor { + NativeSession &Session; + IMap &AddrMap; + + public: + Visitor(NativeSession &Session, IMap &AddrMap) + : Session(Session), AddrMap(AddrMap) {} + void visit(const SectionContrib &C) override { + if (C.Size == 0) + return; + + uint64_t VA = Session.getVAFromSectOffset(C.ISect, C.Off); + uint64_t End = VA + C.Size; + + // Ignore overlapping sections based on the assumption that a valid + // PDB file should not have overlaps. + if (!AddrMap.overlaps(VA, End)) + AddrMap.insert(VA, End, C.Imod); + } + void visit(const SectionContrib2 &C) override { visit(C.Base); } + }; + + Visitor V(Session, AddrToModuleIndex); + Dbi->visitSectionContributions(V); +} + +Optional<uint16_t> SymbolCache::getModuleIndexForAddr(uint64_t Addr) const { + auto Iter = AddrToModuleIndex.find(Addr); + if (Iter == AddrToModuleIndex.end()) + return None; + return Iter.value(); +} diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index 4f10f8524a9b..51a1f0a544e3 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -44,6 +44,9 @@ void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) { void TpiStreamBuilder::addTypeRecord(ArrayRef<uint8_t> Record, Optional<uint32_t> Hash) { // If we just crossed an 8KB threshold, add a type index offset. + assert(((Record.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); size_t NewSize = TypeRecordBytes + Record.size(); constexpr size_t EightKB = 8 * 1024; if (NewSize / EightKB > TypeRecordBytes / EightKB || TypeRecords.empty()) { @@ -153,8 +156,11 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout, return EC; for (auto Rec : TypeRecords) { - assert(!Rec.empty()); // An empty record will not write anything, but it - // would shift all offsets from here on. + assert(!Rec.empty() && "Attempting to write an empty type record shifts " + "all offsets in the TPI stream!"); + assert(((Rec.size() & 3) == 0) && + "The type record's size is not a multiple of 4 bytes which will " + "cause misalignment in the output TPI stream!"); if (auto EC = Writer.writeBytes(Rec)) return EC; } diff --git a/llvm/lib/DebugInfo/PDB/PDB.cpp b/llvm/lib/DebugInfo/PDB/PDB.cpp index e7b968cb7bea..e5b7731f6f4a 100644 --- a/llvm/lib/DebugInfo/PDB/PDB.cpp +++ b/llvm/lib/DebugInfo/PDB/PDB.cpp @@ -23,15 +23,8 @@ using namespace llvm::pdb; Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path, std::unique_ptr<IPDBSession> &Session) { // Create the correct concrete instance type based on the value of Type. - if (Type == PDB_ReaderType::Native) { - ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer = - MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1, - /*RequiresNullTerminator=*/false); - if (!ErrorOrBuffer) - return errorCodeToError(ErrorOrBuffer.getError()); - - return NativeSession::createFromPdb(std::move(*ErrorOrBuffer), Session); - } + if (Type == PDB_ReaderType::Native) + return NativeSession::createFromPdbPath(Path, Session); #if LLVM_ENABLE_DIA_SDK return DIASession::createFromPdb(Path, Session); @@ -43,8 +36,12 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path, Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path, std::unique_ptr<IPDBSession> &Session) { // Create the correct concrete instance type based on the value of Type. - if (Type == PDB_ReaderType::Native) - return NativeSession::createFromExe(Path, Session); + if (Type == PDB_ReaderType::Native) { + Expected<std::string> PdbPath = NativeSession::searchForPdb({Path}); + if (!PdbPath) + return PdbPath.takeError(); + return NativeSession::createFromPdbPath(PdbPath.get(), Session); + } #if LLVM_ENABLE_DIA_SDK return DIASession::createFromExe(Path, Session); diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp index 2f3a2500c293..10352237763c 100644 --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -73,12 +73,12 @@ void DIPrinter::print(const DILineInfo &Info, bool Inlined) { std::string Filename = Info.FileName; if (Filename == DILineInfo::BadString) Filename = DILineInfo::Addr2LineBadString; - else if (Basenames) - Filename = llvm::sys::path::filename(Filename); if (!Verbose) { OS << Filename << ":" << Info.Line; if (Style == OutputStyle::LLVM) OS << ":" << Info.Column; + else if (Style == OutputStyle::GNU && Info.Discriminator != 0) + OS << " (discriminator " << Info.Discriminator << ")"; OS << "\n"; printContext(Filename, Info.Line); return; diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index b4d49d9ff958..84524195fa8a 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -35,13 +35,7 @@ using namespace llvm; using namespace object; using namespace symbolize; -static DILineInfoSpecifier -getDILineInfoSpecifier(FunctionNameKind FNKind) { - return DILineInfoSpecifier( - DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind); -} - -ErrorOr<std::unique_ptr<SymbolizableObjectFile>> +Expected<std::unique_ptr<SymbolizableObjectFile>> SymbolizableObjectFile::create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx, bool UntagAddresses) { @@ -56,12 +50,12 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj, for (section_iterator Section : Obj->sections()) { Expected<StringRef> NameOrErr = Section->getName(); if (!NameOrErr) - return errorToErrorCode(NameOrErr.takeError()); + return NameOrErr.takeError(); if (*NameOrErr == ".opd") { Expected<StringRef> E = Section->getContents(); if (!E) - return errorToErrorCode(E.takeError()); + return E.takeError(); OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(), Obj->getBytesInAddress())); OpdAddress = Section->getAddress(); @@ -72,14 +66,16 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj, std::vector<std::pair<SymbolRef, uint64_t>> Symbols = computeSymbolSizes(*Obj); for (auto &P : Symbols) - res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress); + if (Error E = + res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress)) + return std::move(E); // If this is a COFF object and we didn't find any symbols, try the export // table. if (Symbols.empty()) { if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj)) - if (auto EC = res->addCoffExportSymbols(CoffObj)) - return EC; + if (Error E = res->addCoffExportSymbols(CoffObj)) + return std::move(E); } std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions, @@ -123,7 +119,7 @@ struct OffsetNamePair { } // end anonymous namespace -std::error_code SymbolizableObjectFile::addCoffExportSymbols( +Error SymbolizableObjectFile::addCoffExportSymbols( const COFFObjectFile *CoffObj) { // Get all export names and offsets. std::vector<OffsetNamePair> ExportSyms; @@ -137,7 +133,7 @@ std::error_code SymbolizableObjectFile::addCoffExportSymbols( ExportSyms.push_back(OffsetNamePair{Offset, Name}); } if (ExportSyms.empty()) - return std::error_code(); + return Error::success(); // Sort by ascending offset. array_pod_sort(ExportSyms.begin(), ExportSyms.end()); @@ -154,27 +150,27 @@ std::error_code SymbolizableObjectFile::addCoffExportSymbols( SymbolDesc SD = {SymbolStart, SymbolSize}; Functions.emplace_back(SD, Export.Name); } - return std::error_code(); + return Error::success(); } -std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, - uint64_t SymbolSize, - DataExtractor *OpdExtractor, - uint64_t OpdAddress) { +Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, + uint64_t SymbolSize, + DataExtractor *OpdExtractor, + uint64_t OpdAddress) { // Avoid adding symbols from an unknown/undefined section. const ObjectFile *Obj = Symbol.getObject(); Expected<section_iterator> Sec = Symbol.getSection(); if (!Sec || (Obj && Obj->section_end() == *Sec)) - return std::error_code(); + return Error::success(); Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType(); if (!SymbolTypeOrErr) - return errorToErrorCode(SymbolTypeOrErr.takeError()); + return SymbolTypeOrErr.takeError(); SymbolRef::Type SymbolType = *SymbolTypeOrErr; if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) - return std::error_code(); + return Error::success(); Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress(); if (!SymbolAddressOrErr) - return errorToErrorCode(SymbolAddressOrErr.takeError()); + return SymbolAddressOrErr.takeError(); uint64_t SymbolAddress = *SymbolAddressOrErr; if (UntagAddresses) { // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55 @@ -194,7 +190,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, } Expected<StringRef> SymbolNameOrErr = Symbol.getName(); if (!SymbolNameOrErr) - return errorToErrorCode(SymbolNameOrErr.takeError()); + return SymbolNameOrErr.takeError(); StringRef SymbolName = *SymbolNameOrErr; // Mach-O symbol table names have leading underscore, skip it. if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') @@ -204,7 +200,7 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; SymbolDesc SD = { SymbolAddress, SymbolSize }; M.emplace_back(SD, SymbolName); - return std::error_code(); + return Error::success(); } // Return true if this is a 32-bit x86 PE COFF module. @@ -251,16 +247,16 @@ bool SymbolizableObjectFile::shouldOverrideWithSymbolTable( DILineInfo SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset, - FunctionNameKind FNKind, + DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const { if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection) ModuleOffset.SectionIndex = getModuleSectionIndexForAddress(ModuleOffset.Address); - DILineInfo LineInfo = DebugInfoContext->getLineInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(FNKind)); + DILineInfo LineInfo = + DebugInfoContext->getLineInfoForAddress(ModuleOffset, LineInfoSpecifier); // Override function name from symbol table if necessary. - if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) { + if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName; uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, @@ -272,20 +268,20 @@ SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset, } DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode( - object::SectionedAddress ModuleOffset, FunctionNameKind FNKind, - bool UseSymbolTable) const { + object::SectionedAddress ModuleOffset, + DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const { if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection) ModuleOffset.SectionIndex = getModuleSectionIndexForAddress(ModuleOffset.Address); DIInliningInfo InlinedContext = DebugInfoContext->getInliningInfoForAddress( - ModuleOffset, getDILineInfoSpecifier(FNKind)); + ModuleOffset, LineInfoSpecifier); // Make sure there is at least one frame in context. if (InlinedContext.getNumberOfFrames() == 0) InlinedContext.addFrame(DILineInfo()); // Override the function name in lower frame with name from symbol table. - if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) { + if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName; uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h index b5b9793a44d9..0ba304ee4c61 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -30,15 +30,15 @@ namespace symbolize { class SymbolizableObjectFile : public SymbolizableModule { public: - static ErrorOr<std::unique_ptr<SymbolizableObjectFile>> + static Expected<std::unique_ptr<SymbolizableObjectFile>> create(const object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx, bool UntagAddresses); DILineInfo symbolizeCode(object::SectionedAddress ModuleOffset, - FunctionNameKind FNKind, + DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const override; DIInliningInfo symbolizeInlinedCode(object::SectionedAddress ModuleOffset, - FunctionNameKind FNKind, + DILineInfoSpecifier LineInfoSpecifier, bool UseSymbolTable) const override; DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override; std::vector<DILocal> @@ -60,11 +60,10 @@ private: uint64_t &Size) const; // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd // (function descriptor) section and OpdExtractor refers to its contents. - std::error_code addSymbol(const object::SymbolRef &Symbol, - uint64_t SymbolSize, - DataExtractor *OpdExtractor = nullptr, - uint64_t OpdAddress = 0); - std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj); + Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize, + DataExtractor *OpdExtractor = nullptr, + uint64_t OpdAddress = 0); + Error addCoffExportSymbols(const object::COFFObjectFile *CoffObj); /// Search for the first occurence of specified Address in ObjectFile. uint64_t getModuleSectionIndexForAddress(uint64_t Address) const; diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index 35e3ead6317b..1d767a2b0d88 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -51,8 +51,9 @@ LLVMSymbolizer::symbolizeCodeCommon(SymbolizableModule *Info, if (Opts.RelativeAddresses) ModuleOffset.Address += Info->getModulePreferredBase(); - DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions, - Opts.UseSymbolTable); + DILineInfo LineInfo = Info->symbolizeCode( + ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), + Opts.UseSymbolTable); if (Opts.Demangle) LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); return LineInfo; @@ -66,8 +67,7 @@ LLVMSymbolizer::symbolizeCode(const ObjectFile &Obj, if (I != Modules.end()) return symbolizeCodeCommon(I->second.get(), ModuleOffset); - std::unique_ptr<DIContext> Context = - DWARFContext::create(Obj, nullptr, DWARFContext::defaultErrorHandler); + std::unique_ptr<DIContext> Context = DWARFContext::create(Obj); Expected<SymbolizableModule *> InfoOrErr = createModuleInfo(&Obj, std::move(Context), ModuleName); if (!InfoOrErr) @@ -104,7 +104,8 @@ LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName, ModuleOffset.Address += Info->getModulePreferredBase(); DIInliningInfo InlinedContext = Info->symbolizeInlinedCode( - ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable); + ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), + Opts.UseSymbolTable); if (Opts.Demangle) { for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) { auto *Frame = InlinedContext.getMutableFrame(i); @@ -184,7 +185,7 @@ std::string getDarwinDWARFResourceForPath( } sys::path::append(ResourceName, "Contents", "Resources", "DWARF"); sys::path::append(ResourceName, Basename); - return ResourceName.str(); + return std::string(ResourceName.str()); } bool checkFileCRC(StringRef Path, uint32_t CRCHash) { @@ -205,14 +206,14 @@ bool findDebugBinary(const std::string &OrigPath, // Try relative/path/to/original_binary/debuglink_name llvm::sys::path::append(DebugPath, DebuglinkName); if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); + Result = std::string(DebugPath.str()); return true; } // Try relative/path/to/original_binary/.debug/debuglink_name DebugPath = OrigDir; llvm::sys::path::append(DebugPath, ".debug", DebuglinkName); if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); + Result = std::string(DebugPath.str()); return true; } // Make the path absolute so that lookups will go to @@ -234,7 +235,7 @@ bool findDebugBinary(const std::string &OrigPath, llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir), DebuglinkName); if (checkFileCRC(DebugPath, CRCHash)) { - Result = DebugPath.str(); + Result = std::string(DebugPath.str()); return true; } return false; @@ -300,6 +301,7 @@ Optional<ArrayRef<uint8_t>> getBuildID(const ELFFile<ELFT> *Obj) { for (auto N : Obj->notes(P, Err)) if (N.getType() == ELF::NT_GNU_BUILD_ID && N.getName() == ELF::ELF_NOTE_GNU) return N.getDesc(); + consumeError(std::move(Err)); } return {}; } @@ -341,7 +343,7 @@ bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory, #endif ); if (llvm::sys::fs::exists(Path)) { - Result = Path.str(); + Result = std::string(Path.str()); return true; } } else { @@ -349,7 +351,7 @@ bool findDebugBinary(const std::vector<std::string> &DebugFileDirectory, // Try <debug-file-directory>/.build-id/../... SmallString<128> Path = getDebugPath(Directory); if (llvm::sys::fs::exists(Path)) { - Result = Path.str(); + Result = std::string(Path.str()); return true; } } @@ -365,9 +367,11 @@ ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, // resource directory. std::vector<std::string> DsymPaths; StringRef Filename = sys::path::filename(ExePath); - DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename)); + DsymPaths.push_back( + getDarwinDWARFResourceForPath(ExePath, std::string(Filename))); for (const auto &Path : Opts.DsymHints) { - DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename)); + DsymPaths.push_back( + getDarwinDWARFResourceForPath(Path, std::string(Filename))); } for (const auto &Path : DsymPaths) { auto DbgObjOrErr = getOrCreateObject(Path, ArchName); @@ -508,11 +512,11 @@ LLVMSymbolizer::createModuleInfo(const ObjectFile *Obj, std::unique_ptr<SymbolizableModule> SymMod; if (InfoOrErr) SymMod = std::move(*InfoOrErr); - auto InsertResult = - Modules.insert(std::make_pair(ModuleName, std::move(SymMod))); + auto InsertResult = Modules.insert( + std::make_pair(std::string(ModuleName), std::move(SymMod))); assert(InsertResult.second); - if (std::error_code EC = InfoOrErr.getError()) - return errorCodeToError(EC); + if (!InfoOrErr) + return InfoOrErr.takeError(); return InsertResult.first->second.get(); } @@ -551,8 +555,11 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) { using namespace pdb; std::unique_ptr<IPDBSession> Session; - if (auto Err = loadDataForEXE(PDB_ReaderType::DIA, - Objects.first->getFileName(), Session)) { + PDB_ReaderType ReaderType = Opts.UseNativePDBReader + ? PDB_ReaderType::Native + : PDB_ReaderType::DIA; + if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), + Session)) { Modules.emplace(ModuleName, std::unique_ptr<SymbolizableModule>()); // Return along the PDB filename to provide more context return createFileError(PDBFileName, std::move(Err)); @@ -561,9 +568,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { } } if (!Context) - Context = - DWARFContext::create(*Objects.second, nullptr, - DWARFContext::defaultErrorHandler, Opts.DWPName); + Context = DWARFContext::create(*Objects.second, nullptr, Opts.DWPName); return createModuleInfo(Objects.first, std::move(Context), ModuleName); } @@ -619,7 +624,7 @@ LLVMSymbolizer::DemangleName(const std::string &Name, // Only do MSVC C++ demangling on symbols starting with '?'. int status = 0; char *DemangledName = microsoftDemangle( - Name.c_str(), nullptr, nullptr, &status, + Name.c_str(), nullptr, nullptr, nullptr, &status, MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention | MSDF_NoMemberType | MSDF_NoReturnType)); if (status != 0) |