diff options
Diffstat (limited to 'llvm/lib/CodeGen/AsmPrinter')
43 files changed, 23535 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 000000000000..f6ef85a5b78f --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,135 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} + +ARMException::~ARMException() {} + +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); +} + +void ARMException::beginFunction(const MachineFunction *MF) { + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + getTargetStreamer().emitFnStart(); + // See if we need call frame info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + assert(MoveType != AsmPrinter::CFI_M_EH && + "non-EH CFI not yet supported in prologue with EHABI lowering"); + + if (MoveType == AsmPrinter::CFI_M_Debug) { + if (!hasEmittedCFISections) { + if (Asm->needsOnlyDebugCFIMoves()) + Asm->OutStreamer->EmitCFISections(false, true); + hasEmittedCFISections = true; + } + + shouldEmitCFI = true; + Asm->OutStreamer->EmitCFIStartProc(false); + } +} + +/// endFunction - Gather and emit post-function exception information. +/// +void ARMException::endFunction(const MachineFunction *MF) { + ARMTargetStreamer &ATS = getTargetStreamer(); + const Function &F = MF->getFunction(); + const Function *Per = nullptr; + if (F.hasPersonalityFn()) + Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + bool forceEmitPersonality = + F.hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F.needsUnwindTableEntry(); + bool shouldEmitPersonality = forceEmitPersonality || + !MF->getLandingPads().empty(); + if (!Asm->MF->getFunction().needsUnwindTableEntry() && + !shouldEmitPersonality) + ATS.emitCantUnwind(); + else if (shouldEmitPersonality) { + // Emit references to personality. + if (Per) { + MCSymbol *PerSym = Asm->getSymbol(Per); + Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); + ATS.emitPersonality(PerSym); + } + + // Emit .handlerdata directive. + ATS.emitHandlerData(); + + // Emit actual exception table + emitExceptionTable(); + } + + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitFnEnd(); +} + +void ARMException::emitTypeInfos(unsigned TTypeEncoding, + MCSymbol *TTBaseLabel) { + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (const GlobalValue *GV : reverse(TypeInfos)) { + if (VerboseAsm) + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + Asm->OutStreamer->EmitLabel(TTBaseLabel); + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), + TTypeEncoding); + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp new file mode 100644 index 000000000000..b1b7921ea976 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp @@ -0,0 +1,712 @@ +//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AccelTable.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <limits> +#include <vector> + +using namespace llvm; + +void AccelTableBase::computeBucketCount() { + // First get the number of unique hashes. + std::vector<uint32_t> Uniques; + Uniques.reserve(Entries.size()); + for (const auto &E : Entries) + Uniques.push_back(E.second.HashValue); + array_pod_sort(Uniques.begin(), Uniques.end()); + std::vector<uint32_t>::iterator P = + std::unique(Uniques.begin(), Uniques.end()); + + UniqueHashCount = std::distance(Uniques.begin(), P); + + if (UniqueHashCount > 1024) + BucketCount = UniqueHashCount / 4; + else if (UniqueHashCount > 16) + BucketCount = UniqueHashCount / 2; + else + BucketCount = std::max<uint32_t>(UniqueHashCount, 1); +} + +void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) { + // Create the individual hash data outputs. + for (auto &E : Entries) { + // Unique the entries. + llvm::stable_sort(E.second.Values, + [](const AccelTableData *A, const AccelTableData *B) { + return *A < *B; + }); + E.second.Values.erase( + std::unique(E.second.Values.begin(), E.second.Values.end()), + E.second.Values.end()); + } + + // Figure out how many buckets we need, then compute the bucket contents and + // the final ordering. The hashes and offsets can be emitted by walking these + // data structures. We add temporary symbols to the data so they can be + // referenced when emitting the offsets. + computeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(BucketCount); + for (auto &E : Entries) { + uint32_t Bucket = E.second.HashValue % BucketCount; + Buckets[Bucket].push_back(&E.second); + E.second.Sym = Asm->createTempSymbol(Prefix); + } + + // Sort the contents of the buckets by hash value so that hash collisions end + // up together. Stable sort makes testing easier and doesn't cost much more. + for (auto &Bucket : Buckets) + llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); +} + +namespace { +/// Base class for writing out Accelerator tables. It holds the common +/// functionality for the two Accelerator table types. +class AccelTableWriter { +protected: + AsmPrinter *const Asm; ///< Destination. + const AccelTableBase &Contents; ///< Data to emit. + + /// Controls whether to emit duplicate hash and offset table entries for names + /// with identical hashes. Apple tables don't emit duplicate entries, DWARF v5 + /// tables do. + const bool SkipIdenticalHashes; + + void emitHashes() const; + + /// Emit offsets to lists of entries with identical names. The offsets are + /// relative to the Base argument. + void emitOffsets(const MCSymbol *Base) const; + +public: + AccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, + bool SkipIdenticalHashes) + : Asm(Asm), Contents(Contents), SkipIdenticalHashes(SkipIdenticalHashes) { + } +}; + +class AppleAccelTableWriter : public AccelTableWriter { + using Atom = AppleAccelTableData::Atom; + + /// The fixed header of an Apple Accelerator Table. + struct Header { + uint32_t Magic = MagicHash; + uint16_t Version = 1; + uint16_t HashFunction = dwarf::DW_hash_function_djb; + uint32_t BucketCount; + uint32_t HashCount; + uint32_t HeaderDataLength; + + /// 'HASH' magic value to detect endianness. + static const uint32_t MagicHash = 0x48415348; + + Header(uint32_t BucketCount, uint32_t UniqueHashCount, uint32_t DataLength) + : BucketCount(BucketCount), HashCount(UniqueHashCount), + HeaderDataLength(DataLength) {} + + void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + + /// The HeaderData describes the structure of an Apple accelerator table + /// through a list of Atoms. + struct HeaderData { + /// In the case of data that is referenced via DW_FORM_ref_* the offset + /// base is used to describe the offset for all forms in the list of atoms. + uint32_t DieOffsetBase; + + const SmallVector<Atom, 4> Atoms; + + HeaderData(ArrayRef<Atom> AtomList, uint32_t Offset = 0) + : DieOffsetBase(Offset), Atoms(AtomList.begin(), AtomList.end()) {} + + void emit(AsmPrinter *Asm) const; +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif + }; + + Header Header; + HeaderData HeaderData; + const MCSymbol *SecBegin; + + void emitBuckets() const; + void emitData() const; + +public: + AppleAccelTableWriter(AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<Atom> Atoms, const MCSymbol *SecBegin) + : AccelTableWriter(Asm, Contents, true), + Header(Contents.getBucketCount(), Contents.getUniqueHashCount(), + 8 + (Atoms.size() * 4)), + HeaderData(Atoms), SecBegin(SecBegin) {} + + void emit() const; + +#ifndef NDEBUG + void print(raw_ostream &OS) const; + void dump() const { print(dbgs()); } +#endif +}; + +/// Class responsible for emitting a DWARF v5 Accelerator Table. The only +/// public function is emit(), which performs the actual emission. +/// +/// The class is templated in its data type. This allows us to emit both dyamic +/// and static data entries. A callback abstract the logic to provide a CU +/// index for a given entry, which is different per data type, but identical +/// for every entry in the same table. +template <typename DataT> +class Dwarf5AccelTableWriter : public AccelTableWriter { + struct Header { + uint32_t UnitLength = 0; + uint16_t Version = 5; + uint16_t Padding = 0; + uint32_t CompUnitCount; + uint32_t LocalTypeUnitCount = 0; + uint32_t ForeignTypeUnitCount = 0; + uint32_t BucketCount; + uint32_t NameCount; + uint32_t AbbrevTableSize = 0; + uint32_t AugmentationStringSize = sizeof(AugmentationString); + char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'}; + + Header(uint32_t CompUnitCount, uint32_t BucketCount, uint32_t NameCount) + : CompUnitCount(CompUnitCount), BucketCount(BucketCount), + NameCount(NameCount) {} + + void emit(const Dwarf5AccelTableWriter &Ctx) const; + }; + struct AttributeEncoding { + dwarf::Index Index; + dwarf::Form Form; + }; + + Header Header; + DenseMap<uint32_t, SmallVector<AttributeEncoding, 2>> Abbreviations; + ArrayRef<MCSymbol *> CompUnits; + llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry; + MCSymbol *ContributionStart = Asm->createTempSymbol("names_start"); + MCSymbol *ContributionEnd = Asm->createTempSymbol("names_end"); + MCSymbol *AbbrevStart = Asm->createTempSymbol("names_abbrev_start"); + MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end"); + MCSymbol *EntryPool = Asm->createTempSymbol("names_entries"); + + DenseSet<uint32_t> getUniqueTags() const; + + // Right now, we emit uniform attributes for all tags. + SmallVector<AttributeEncoding, 2> getUniformAttributes() const; + + void emitCUList() const; + void emitBuckets() const; + void emitStringOffsets() const; + void emitAbbrevs() const; + void emitEntry(const DataT &Entry) const; + void emitData() const; + +public: + Dwarf5AccelTableWriter( + AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<MCSymbol *> CompUnits, + llvm::function_ref<unsigned(const DataT &)> GetCUIndexForEntry); + + void emit() const; +}; +} // namespace + +void AccelTableWriter::emitHashes() const { + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + unsigned BucketIdx = 0; + for (auto &Bucket : Contents.getBuckets()) { + for (auto &Hash : Bucket) { + uint32_t HashValue = Hash->HashValue; + if (SkipIdenticalHashes && PrevHash == HashValue) + continue; + Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(BucketIdx)); + Asm->emitInt32(HashValue); + PrevHash = HashValue; + } + BucketIdx++; + } +} + +void AccelTableWriter::emitOffsets(const MCSymbol *Base) const { + const auto &Buckets = Contents.getBuckets(); + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (auto *Hash : Buckets[i]) { + uint32_t HashValue = Hash->HashValue; + if (SkipIdenticalHashes && PrevHash == HashValue) + continue; + PrevHash = HashValue; + Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); + Asm->EmitLabelDifference(Hash->Sym, Base, sizeof(uint32_t)); + } + } +} + +void AppleAccelTableWriter::Header::emit(AsmPrinter *Asm) const { + Asm->OutStreamer->AddComment("Header Magic"); + Asm->emitInt32(Magic); + Asm->OutStreamer->AddComment("Header Version"); + Asm->emitInt16(Version); + Asm->OutStreamer->AddComment("Header Hash Function"); + Asm->emitInt16(HashFunction); + Asm->OutStreamer->AddComment("Header Bucket Count"); + Asm->emitInt32(BucketCount); + Asm->OutStreamer->AddComment("Header Hash Count"); + Asm->emitInt32(HashCount); + Asm->OutStreamer->AddComment("Header Data Length"); + Asm->emitInt32(HeaderDataLength); +} + +void AppleAccelTableWriter::HeaderData::emit(AsmPrinter *Asm) const { + Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); + Asm->emitInt32(DieOffsetBase); + Asm->OutStreamer->AddComment("HeaderData Atom Count"); + Asm->emitInt32(Atoms.size()); + + for (const Atom &A : Atoms) { + Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.Type)); + Asm->emitInt16(A.Type); + Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.Form)); + Asm->emitInt16(A.Form); + } +} + +void AppleAccelTableWriter::emitBuckets() const { + const auto &Buckets = Contents.getBuckets(); + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer->AddComment("Bucket " + Twine(i)); + if (!Buckets[i].empty()) + Asm->emitInt32(index); + else + Asm->emitInt32(std::numeric_limits<uint32_t>::max()); + // Buckets point in the list of hashes, not to the data. Do not increment + // the index multiple times in case of hash collisions. + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } + } +} + +void AppleAccelTableWriter::emitData() const { + const auto &Buckets = Contents.getBuckets(); + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = std::numeric_limits<uint64_t>::max(); + for (auto &Hash : Buckets[i]) { + // Terminate the previous entry if there is no hash collision with the + // current one. + if (PrevHash != std::numeric_limits<uint64_t>::max() && + PrevHash != Hash->HashValue) + Asm->emitInt32(0); + // Remember to emit the label for our offset. + Asm->OutStreamer->EmitLabel(Hash->Sym); + Asm->OutStreamer->AddComment(Hash->Name.getString()); + Asm->emitDwarfStringOffset(Hash->Name); + Asm->OutStreamer->AddComment("Num DIEs"); + Asm->emitInt32(Hash->Values.size()); + for (const auto *V : Hash->Values) + static_cast<const AppleAccelTableData *>(V)->emit(Asm); + PrevHash = Hash->HashValue; + } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->emitInt32(0); + } +} + +void AppleAccelTableWriter::emit() const { + Header.emit(Asm); + HeaderData.emit(Asm); + emitBuckets(); + emitHashes(); + emitOffsets(SecBegin); + emitData(); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::Header::emit( + const Dwarf5AccelTableWriter &Ctx) const { + assert(CompUnitCount > 0 && "Index must have at least one CU."); + + AsmPrinter *Asm = Ctx.Asm; + Asm->OutStreamer->AddComment("Header: unit length"); + Asm->EmitLabelDifference(Ctx.ContributionEnd, Ctx.ContributionStart, + sizeof(uint32_t)); + Asm->OutStreamer->EmitLabel(Ctx.ContributionStart); + Asm->OutStreamer->AddComment("Header: version"); + Asm->emitInt16(Version); + Asm->OutStreamer->AddComment("Header: padding"); + Asm->emitInt16(Padding); + Asm->OutStreamer->AddComment("Header: compilation unit count"); + Asm->emitInt32(CompUnitCount); + Asm->OutStreamer->AddComment("Header: local type unit count"); + Asm->emitInt32(LocalTypeUnitCount); + Asm->OutStreamer->AddComment("Header: foreign type unit count"); + Asm->emitInt32(ForeignTypeUnitCount); + Asm->OutStreamer->AddComment("Header: bucket count"); + Asm->emitInt32(BucketCount); + Asm->OutStreamer->AddComment("Header: name count"); + Asm->emitInt32(NameCount); + Asm->OutStreamer->AddComment("Header: abbreviation table size"); + Asm->EmitLabelDifference(Ctx.AbbrevEnd, Ctx.AbbrevStart, sizeof(uint32_t)); + Asm->OutStreamer->AddComment("Header: augmentation string size"); + assert(AugmentationStringSize % 4 == 0); + Asm->emitInt32(AugmentationStringSize); + Asm->OutStreamer->AddComment("Header: augmentation string"); + Asm->OutStreamer->EmitBytes({AugmentationString, AugmentationStringSize}); +} + +template <typename DataT> +DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const { + DenseSet<uint32_t> UniqueTags; + for (auto &Bucket : Contents.getBuckets()) { + for (auto *Hash : Bucket) { + for (auto *Value : Hash->Values) { + unsigned Tag = static_cast<const DataT *>(Value)->getDieTag(); + UniqueTags.insert(Tag); + } + } + } + return UniqueTags; +} + +template <typename DataT> +SmallVector<typename Dwarf5AccelTableWriter<DataT>::AttributeEncoding, 2> +Dwarf5AccelTableWriter<DataT>::getUniformAttributes() const { + SmallVector<AttributeEncoding, 2> UA; + if (CompUnits.size() > 1) { + size_t LargestCUIndex = CompUnits.size() - 1; + dwarf::Form Form = DIEInteger::BestForm(/*IsSigned*/ false, LargestCUIndex); + UA.push_back({dwarf::DW_IDX_compile_unit, Form}); + } + UA.push_back({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4}); + return UA; +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitCUList() const { + for (const auto &CU : enumerate(CompUnits)) { + Asm->OutStreamer->AddComment("Compilation unit " + Twine(CU.index())); + Asm->emitDwarfSymbolReference(CU.value()); + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitBuckets() const { + uint32_t Index = 1; + for (const auto &Bucket : enumerate(Contents.getBuckets())) { + Asm->OutStreamer->AddComment("Bucket " + Twine(Bucket.index())); + Asm->emitInt32(Bucket.value().empty() ? 0 : Index); + Index += Bucket.value().size(); + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const { + for (const auto &Bucket : enumerate(Contents.getBuckets())) { + for (auto *Hash : Bucket.value()) { + DwarfStringPoolEntryRef String = Hash->Name; + Asm->OutStreamer->AddComment("String in Bucket " + Twine(Bucket.index()) + + ": " + String.getString()); + Asm->emitDwarfStringOffset(String); + } + } +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const { + Asm->OutStreamer->EmitLabel(AbbrevStart); + for (const auto &Abbrev : Abbreviations) { + Asm->OutStreamer->AddComment("Abbrev code"); + assert(Abbrev.first != 0); + Asm->EmitULEB128(Abbrev.first); + Asm->OutStreamer->AddComment(dwarf::TagString(Abbrev.first)); + Asm->EmitULEB128(Abbrev.first); + for (const auto &AttrEnc : Abbrev.second) { + Asm->EmitULEB128(AttrEnc.Index, dwarf::IndexString(AttrEnc.Index).data()); + Asm->EmitULEB128(AttrEnc.Form, + dwarf::FormEncodingString(AttrEnc.Form).data()); + } + Asm->EmitULEB128(0, "End of abbrev"); + Asm->EmitULEB128(0, "End of abbrev"); + } + Asm->EmitULEB128(0, "End of abbrev list"); + Asm->OutStreamer->EmitLabel(AbbrevEnd); +} + +template <typename DataT> +void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) const { + auto AbbrevIt = Abbreviations.find(Entry.getDieTag()); + assert(AbbrevIt != Abbreviations.end() && + "Why wasn't this abbrev generated?"); + + Asm->EmitULEB128(AbbrevIt->first, "Abbreviation code"); + for (const auto &AttrEnc : AbbrevIt->second) { + Asm->OutStreamer->AddComment(dwarf::IndexString(AttrEnc.Index)); + switch (AttrEnc.Index) { + case dwarf::DW_IDX_compile_unit: { + DIEInteger ID(getCUIndexForEntry(Entry)); + ID.EmitValue(Asm, AttrEnc.Form); + break; + } + case dwarf::DW_IDX_die_offset: + assert(AttrEnc.Form == dwarf::DW_FORM_ref4); + Asm->emitInt32(Entry.getDieOffset()); + break; + default: + llvm_unreachable("Unexpected index attribute!"); + } + } +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() const { + Asm->OutStreamer->EmitLabel(EntryPool); + for (auto &Bucket : Contents.getBuckets()) { + for (auto *Hash : Bucket) { + // Remember to emit the label for our offset. + Asm->OutStreamer->EmitLabel(Hash->Sym); + for (const auto *Value : Hash->Values) + emitEntry(*static_cast<const DataT *>(Value)); + Asm->OutStreamer->AddComment("End of list: " + Hash->Name.getString()); + Asm->emitInt32(0); + } + } +} + +template <typename DataT> +Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter( + AsmPrinter *Asm, const AccelTableBase &Contents, + ArrayRef<MCSymbol *> CompUnits, + llvm::function_ref<unsigned(const DataT &)> getCUIndexForEntry) + : AccelTableWriter(Asm, Contents, false), + Header(CompUnits.size(), Contents.getBucketCount(), + Contents.getUniqueNameCount()), + CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) { + DenseSet<uint32_t> UniqueTags = getUniqueTags(); + SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes(); + + Abbreviations.reserve(UniqueTags.size()); + for (uint32_t Tag : UniqueTags) + Abbreviations.try_emplace(Tag, UniformAttributes); +} + +template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() const { + Header.emit(*this); + emitCUList(); + emitBuckets(); + emitHashes(); + emitStringOffsets(); + emitOffsets(EntryPool); + emitAbbrevs(); + emitData(); + Asm->OutStreamer->EmitValueToAlignment(4, 0); + Asm->OutStreamer->EmitLabel(ContributionEnd); +} + +void llvm::emitAppleAccelTableImpl(AsmPrinter *Asm, AccelTableBase &Contents, + StringRef Prefix, const MCSymbol *SecBegin, + ArrayRef<AppleAccelTableData::Atom> Atoms) { + Contents.finalize(Asm, Prefix); + AppleAccelTableWriter(Asm, Contents, Atoms, SecBegin).emit(); +} + +void llvm::emitDWARF5AccelTable( + AsmPrinter *Asm, AccelTable<DWARF5AccelTableData> &Contents, + const DwarfDebug &DD, ArrayRef<std::unique_ptr<DwarfCompileUnit>> CUs) { + std::vector<MCSymbol *> CompUnits; + SmallVector<unsigned, 1> CUIndex(CUs.size()); + int Count = 0; + for (const auto &CU : enumerate(CUs)) { + if (CU.value()->getCUNode()->getNameTableKind() != + DICompileUnit::DebugNameTableKind::Default) + continue; + CUIndex[CU.index()] = Count++; + assert(CU.index() == CU.value()->getUniqueID()); + const DwarfCompileUnit *MainCU = + DD.useSplitDwarf() ? CU.value()->getSkeleton() : CU.value().get(); + CompUnits.push_back(MainCU->getLabelBegin()); + } + + if (CompUnits.empty()) + return; + + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfDebugNamesSection()); + + Contents.finalize(Asm, "names"); + Dwarf5AccelTableWriter<DWARF5AccelTableData>( + Asm, Contents, CompUnits, + [&](const DWARF5AccelTableData &Entry) { + const DIE *CUDie = Entry.getDie().getUnitDie(); + return CUIndex[DD.lookupCU(CUDie)->getUniqueID()]; + }) + .emit(); +} + +void llvm::emitDWARF5AccelTable( + AsmPrinter *Asm, AccelTable<DWARF5AccelTableStaticData> &Contents, + ArrayRef<MCSymbol *> CUs, + llvm::function_ref<unsigned(const DWARF5AccelTableStaticData &)> + getCUIndexForEntry) { + Contents.finalize(Asm, "names"); + Dwarf5AccelTableWriter<DWARF5AccelTableStaticData>(Asm, Contents, CUs, + getCUIndexForEntry) + .emit(); +} + +void AppleAccelTableOffsetData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Die.getDebugSectionOffset()); +} + +void AppleAccelTableTypeData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Die.getDebugSectionOffset()); + Asm->emitInt16(Die.getTag()); + Asm->emitInt8(0); +} + +void AppleAccelTableStaticOffsetData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Offset); +} + +void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const { + Asm->emitInt32(Offset); + Asm->emitInt16(Tag); + Asm->emitInt8(ObjCClassIsImplementation ? dwarf::DW_FLAG_type_implementation + : 0); + Asm->emitInt32(QualifiedNameHash); +} + +constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[]; +constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[]; + +#ifndef NDEBUG +void AppleAccelTableWriter::Header::print(raw_ostream &OS) const { + OS << "Magic: " << format("0x%x", Magic) << "\n" + << "Version: " << Version << "\n" + << "Hash Function: " << HashFunction << "\n" + << "Bucket Count: " << BucketCount << "\n" + << "Header Data Length: " << HeaderDataLength << "\n"; +} + +void AppleAccelTableData::Atom::print(raw_ostream &OS) const { + OS << "Type: " << dwarf::AtomTypeString(Type) << "\n" + << "Form: " << dwarf::FormEncodingString(Form) << "\n"; +} + +void AppleAccelTableWriter::HeaderData::print(raw_ostream &OS) const { + OS << "DIE Offset Base: " << DieOffsetBase << "\n"; + for (auto Atom : Atoms) + Atom.print(OS); +} + +void AppleAccelTableWriter::print(raw_ostream &OS) const { + Header.print(OS); + HeaderData.print(OS); + Contents.print(OS); + SecBegin->print(OS, nullptr); +} + +void AccelTableBase::HashData::print(raw_ostream &OS) const { + OS << "Name: " << Name.getString() << "\n"; + OS << " Hash Value: " << format("0x%x", HashValue) << "\n"; + OS << " Symbol: "; + if (Sym) + OS << *Sym; + else + OS << "<none>"; + OS << "\n"; + for (auto *Value : Values) + Value->print(OS); +} + +void AccelTableBase::print(raw_ostream &OS) const { + // Print Content. + OS << "Entries: \n"; + for (const auto &Entry : Entries) { + OS << "Name: " << Entry.first() << "\n"; + for (auto *V : Entry.second.Values) + V->print(OS); + } + + OS << "Buckets and Hashes: \n"; + for (auto &Bucket : Buckets) + for (auto &Hash : Bucket) + Hash->print(OS); + + OS << "Data: \n"; + for (auto &E : Entries) + E.second.print(OS); +} + +void DWARF5AccelTableData::print(raw_ostream &OS) const { + OS << " Offset: " << getDieOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void DWARF5AccelTableStaticData::print(raw_ostream &OS) const { + OS << " Offset: " << getDieOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(getDieTag()) << "\n"; +} + +void AppleAccelTableOffsetData::print(raw_ostream &OS) const { + OS << " Offset: " << Die.getOffset() << "\n"; +} + +void AppleAccelTableTypeData::print(raw_ostream &OS) const { + OS << " Offset: " << Die.getOffset() << "\n"; + OS << " Tag: " << dwarf::TagString(Die.getTag()) << "\n"; +} + +void AppleAccelTableStaticOffsetData::print(raw_ostream &OS) const { + OS << " Static Offset: " << Offset << "\n"; +} + +void AppleAccelTableStaticTypeData::print(raw_ostream &OS) const { + OS << " Static Offset: " << Offset << "\n"; + OS << " QualifiedNameHash: " << format("%x\n", QualifiedNameHash) << "\n"; + OS << " Tag: " << dwarf::TagString(Tag) << "\n"; + OS << " ObjCClassIsImplementation: " + << (ObjCClassIsImplementation ? "true" : "false"); + OS << "\n"; +} +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp new file mode 100644 index 000000000000..f11c7de5ed8a --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -0,0 +1,77 @@ +//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AddressPool.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <utility> + +using namespace llvm; + +unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { + HasBeenUsed = true; + auto IterBool = + Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); + return IterBool.first->second.Number; +} + +MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) { + static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize(); + StringRef Prefix = "debug_addr_"; + MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start"); + MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end"); + + Asm.OutStreamer->AddComment("Length of contribution"); + Asm.EmitLabelDifference(EndLabel, BeginLabel, + 4); // TODO: Support DWARF64 format. + Asm.OutStreamer->EmitLabel(BeginLabel); + Asm.OutStreamer->AddComment("DWARF version number"); + Asm.emitInt16(Asm.getDwarfVersion()); + Asm.OutStreamer->AddComment("Address size"); + Asm.emitInt8(AddrSize); + Asm.OutStreamer->AddComment("Segment selector size"); + Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size. + + return EndLabel; +} + +// Emit addresses into the section given. +void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { + if (isEmpty()) + return; + + // Start the dwarf addr section. + Asm.OutStreamer->SwitchSection(AddrSection); + + MCSymbol *EndLabel = nullptr; + + if (Asm.getDwarfVersion() >= 5) + EndLabel = emitHeader(Asm, AddrSection); + + // Define the symbol that marks the start of the contribution. + // It is referenced via DW_AT_addr_base. + Asm.OutStreamer->EmitLabel(AddressTableBaseSym); + + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(Pool.size()); + + for (const auto &I : Pool) + Entries[I.second.Number] = + I.second.TLS + ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first) + : MCSymbolRefExpr::create(I.first, Asm.OutContext); + + for (const MCExpr *Entry : Entries) + Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); + + if (EndLabel) + Asm.OutStreamer->EmitLabel(EndLabel); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h new file mode 100644 index 000000000000..f92cf72093ca --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -0,0 +1,65 @@ +//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +class AsmPrinter; +class MCSection; +class MCSymbol; + +// Collection of addresses for this unit and assorted labels. +// A Symbol->unsigned mapping of addresses used by indirect +// references. +class AddressPool { + struct AddressPoolEntry { + unsigned Number; + bool TLS; + + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} + }; + DenseMap<const MCSymbol *, AddressPoolEntry> Pool; + + /// Record whether the AddressPool has been queried for an address index since + /// the last "resetUsedFlag" call. Used to implement type unit fallback - a + /// type that references addresses cannot be placed in a type unit when using + /// fission. + bool HasBeenUsed = false; + +public: + AddressPool() = default; + + /// Returns the index into the address pool with the given + /// label/symbol. + unsigned getIndex(const MCSymbol *Sym, bool TLS = false); + + void emit(AsmPrinter &Asm, MCSection *AddrSection); + + bool isEmpty() { return Pool.empty(); } + + bool hasBeenUsed() const { return HasBeenUsed; } + + void resetUsedFlag() { HasBeenUsed = false; } + + MCSymbol *getLabel() { return AddressTableBaseSym; } + void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; } + +private: + MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section); + + /// Symbol designates the start of the contribution to the address table. + MCSymbol *AddressTableBaseSym = nullptr; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 000000000000..73c53d6c4af5 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,3211 @@ +//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "CodeViewDebug.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "WasmException.h" +#include "WinCFGuard.h" +#include "WinException.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Comdat.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalIFunc.h" +#include "llvm/IR/GlobalIndirectSymbol.h" +#include "llvm/IR/GlobalObject.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/RemarkStreamer.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodePadder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSectionXCOFF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCSymbolXCOFF.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Pass.h" +#include "llvm/Remarks/Remark.h" +#include "llvm/Remarks/RemarkFormat.h" +#include "llvm/Remarks/RemarkStringTable.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cinttypes> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +static const char *const DWARFGroupName = "dwarf"; +static const char *const DWARFGroupDescription = "DWARF Emission"; +static const char *const DbgTimerName = "emit"; +static const char *const DbgTimerDescription = "Debug Info Emission"; +static const char *const EHTimerName = "write_exception"; +static const char *const EHTimerDescription = "DWARF Exception Writer"; +static const char *const CFGuardName = "Control Flow Guard"; +static const char *const CFGuardDescription = "Control Flow Guard Tables"; +static const char *const CodeViewLineTablesGroupName = "linetables"; +static const char *const CodeViewLineTablesGroupDescription = + "CodeView Line Tables"; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +static cl::opt<bool> EnableRemarksSection( + "remarks-section", + cl::desc("Emit a section containing remark diagnostics metadata"), + cl::init(false)); + +char AsmPrinter::ID = 0; + +using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>; + +static gcp_map_type &getGCMap(void *&P) { + if (!P) + P = new gcp_map_type(); + return *(gcp_map_type*)P; +} + +/// getGVAlignment - Return the alignment to use for the specified global +/// value. This rounds up to the preferred alignment if possible and legal. +Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL, + Align InAlign) { + Align Alignment; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + Alignment = Align(DL.getPreferredAlignment(GVar)); + + // If InAlign is specified, round it to it. + if (InAlign > Alignment) + Alignment = InAlign; + + // If the GV has a specified alignment, take it into account. + const MaybeAlign GVAlign(GV->getAlignment()); + if (!GVAlign) + return Alignment; + + assert(GVAlign && "GVAlign must be set"); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (*GVAlign > Alignment || GV->hasSection()) + Alignment = *GVAlign; + return Alignment; +} + +AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), + OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)) { + VerboseAsm = OutStreamer->isVerboseAsm(); +} + +AsmPrinter::~AsmPrinter() { + assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized"); + + if (GCMetadataPrinters) { + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + + delete &GCMap; + GCMetadataPrinters = nullptr; + } +} + +bool AsmPrinter::isPositionIndependent() const { + return TM.isPositionIndependent(); +} + +/// getFunctionNumber - Return a unique ID for the current function. +unsigned AsmPrinter::getFunctionNumber() const { + return MF->getFunctionNumber(); +} + +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return *TM.getObjFileLowering(); +} + +const DataLayout &AsmPrinter::getDataLayout() const { + return MMI->getModule()->getDataLayout(); +} + +// Do not use the cached DataLayout because some client use it without a Module +// (dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { + return TM.getPointerSize(0); // FIXME: Default address space +} + +const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { + assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); + return MF->getSubtarget<MCSubtargetInfo>(); +} + +void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { + S.EmitInstruction(Inst, getSubtargetInfo()); +} + +void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) { + assert(DD && "Dwarf debug file is not defined."); + assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode."); + (void)DD->emitInitialLocDirective(MF, /*CUID=*/0); +} + +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer->getCurrentSectionOnly(); +} + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfoWrapperPass>(); + AU.addRequired<MachineOptimizationRemarkEmitterPass>(); + AU.addRequired<GCModuleInfo>(); +} + +bool AsmPrinter::doInitialization(Module &M) { + auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>(); + MMI = MMIWP ? &MMIWP->getMMI() : nullptr; + + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); + + const_cast<TargetLoweringObjectFile &>(getObjFileLowering()) + .getModuleMetadata(M); + + OutStreamer->InitSections(false); + + // Emit the version-min deployment target directive if needed. + // + // FIXME: If we end up with a collection of these sorts of Darwin-specific + // or ELF-specific things, it may make sense to have a platform helper class + // that will work with the target helper class. For now keep it here, as the + // alternative is duplicated code in each of the target asm printers that + // use the directive, where it would need the same conditionalization + // anyway. + const Triple &Target = TM.getTargetTriple(); + OutStreamer->EmitVersionForTarget(Target, M.getSDKVersion()); + + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); + + // Very minimal debug info. It is ignored if we emit actual debug info. If we + // don't, this at least helps the user find where a global came from. + if (MAI->hasSingleParameterDotFile()) { + // .file "foo.c" + OutStreamer->EmitFileDirective( + llvm::sys::path::filename(M.getSourceFileName())); + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (auto &I : *MI) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(M, *MI, *this); + + // Emit module-level inline asm if it exists. + if (!M.getModuleInlineAsm().empty()) { + // We're at the module level. Construct MCSubtarget from the default CPU + // and target triple. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple().str(), TM.getTargetCPU(), + TM.getTargetFeatureString())); + OutStreamer->AddComment("Start of file scope inline assembly"); + OutStreamer->AddBlankLine(); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", + OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); + OutStreamer->AddComment("End of file scope inline assembly"); + OutStreamer->AddBlankLine(); + } + + if (MAI->doesSupportDebugInformation()) { + bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); + if (EmitCodeView && TM.getTargetTriple().isOSWindows()) { + Handlers.emplace_back(std::make_unique<CodeViewDebug>(this), + DbgTimerName, DbgTimerDescription, + CodeViewLineTablesGroupName, + CodeViewLineTablesGroupDescription); + } + if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { + DD = new DwarfDebug(this, &M); + DD->beginModule(); + Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName, + DbgTimerDescription, DWARFGroupName, + DWARFGroupDescription); + } + } + + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + isCFIMoveForDebugging = true; + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + break; + for (auto &F: M.getFunctionList()) { + // If the module contains any function with unwind data, + // .eh_frame has to be emitted. + // Ignore functions that won't get emitted. + if (!F.isDeclarationForLinker() && F.needsUnwindTableEntry()) { + isCFIMoveForDebugging = false; + break; + } + } + break; + default: + isCFIMoveForDebugging = false; + break; + } + + EHStreamer *ES = nullptr; + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::None: + break; + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + ES = new DwarfCFIException(this); + break; + case ExceptionHandling::ARM: + ES = new ARMException(this); + break; + case ExceptionHandling::WinEH: + switch (MAI->getWinEHEncodingType()) { + default: llvm_unreachable("unsupported unwinding information encoding"); + case WinEH::EncodingType::Invalid: + break; + case WinEH::EncodingType::X86: + case WinEH::EncodingType::Itanium: + ES = new WinException(this); + break; + } + break; + case ExceptionHandling::Wasm: + ES = new WasmException(this); + break; + } + if (ES) + Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName, + EHTimerDescription, DWARFGroupName, + DWARFGroupDescription); + + if (mdconst::extract_or_null<ConstantInt>( + MMI->getModule()->getModuleFlag("cfguardtable"))) + Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName, + CFGuardDescription, DWARFGroupName, + DWARFGroupDescription); + + return false; +} + +static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { + if (!MAI.hasWeakDefCanBeHiddenDirective()) + return false; + + return GV->canBeOmittedFromSymbolTable(); +} + +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + if (MAI->hasWeakDefDirective()) { + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + + if (!canBeHidden(GV, *MAI)) + // .weak_definition _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + } else if (MAI->hasLinkOnceDirective()) { + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + //NOTE: linkonce is handled by the section the symbol was assigned to. + } else { + // .weak _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak); + } + return; + case GlobalValue::ExternalLinkage: + // If external, declare as a global symbol: .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + return; + case GlobalValue::PrivateLinkage: + return; + case GlobalValue::InternalLinkage: + if (MAI->hasDotLGloblDirective()) + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal); + return; + case GlobalValue::AppendingLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Should never emit this"); + } + llvm_unreachable("Unknown linkage type!"); +} + +void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name, + const GlobalValue *GV) const { + TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler()); +} + +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return TM.getSymbol(GV); +} + +/// EmitGlobalVariable - Emit the specified global variable to the .s file. +void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = TM.useEmulatedTLS() && GV->isThreadLocal(); + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + + // Never emit TLS variable xyz in emulated TLS model. + // The initialization value is in __emutls_t.xyz instead of xyz. + if (IsEmuTLSVar) + return; + + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; + + // Skip the emission of global equivalents. The symbol can be emitted later + // on by emitGlobalGOTEquivs in case it turns out to be needed. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + + if (isVerbose()) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. + GV->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, GV->getParent()); + OutStreamer->GetCommentOS() << '\n'; + } + } + + MCSymbol *GVSym = getSymbol(GV); + MCSymbol *EmittedSym = GVSym; + + // getOrCreateEmuTLSControlSym only creates the symbol with name and default + // attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + + if (!GV->hasInitializer()) // External globals require no extra code. + return; + + GVSym->redefineIfPossible(); + if (GVSym->isDefined() || GVSym->isVariable()) + report_fatal_error("symbol '" + Twine(GVSym->getName()) + + "' is already defined"); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getValueType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + const Align Alignment = getGVAlignment(GV, DL); + + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, + TimePassesIsEnabled); + HI.Handler->setSymbolSize(GVSym, Size); + } + + // Handle common symbols + if (GVKind.isCommon()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + // .comm _foo, 42, 4 + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); + return; + } + + // Determine to which section this global should be emitted. + MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM); + + // If we have a bss global going to a section that supports the + // zerofill directive, do so here. + if (GVKind.isBSS() && MAI->hasMachoZeroFillDirective() && + TheSection->isVirtualSection()) { + if (Size == 0) + Size = 1; // zerofill of 0 bytes is undefined. + EmitLinkage(GV, GVSym); + // .zerofill __DATA, __bss, _foo, 400, 5 + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value()); + return; + } + + // If this is a BSS local symbol and we are emitting in the BSS + // section use .lcomm/.comm directive. + if (GVKind.isBSSLocal() && + getObjFileLowering().getBSSSection() == TheSection) { + if (Size == 0) + Size = 1; // .comm Foo, 0 is undefined, avoid it. + + // Use .lcomm only if it supports user-specified alignment. + // Otherwise, while it would still be correct to use .lcomm in some + // cases (e.g. when Align == 1), the external assembler might enfore + // some -unknown- default alignment behavior, which could cause + // spurious differences between external and integrated assembler. + // Prefer to simply fall back to .local / .comm in this case. + if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { + // .lcomm _foo, 42 + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value()); + return; + } + + // .local _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); + // .comm _foo, 42, 4 + const bool SupportsAlignment = + getObjFileLowering().getCommDirectiveSupportsAlignment(); + OutStreamer->EmitCommonSymbol(GVSym, Size, + SupportsAlignment ? Alignment.value() : 0); + return; + } + + // Handle thread local data for mach-o which requires us to output an + // additional structure of data and mangle the original symbol so that we + // can reference it later. + // + // TODO: This should become an "emit thread local global" method on TLOF. + // All of this macho specific stuff should be sunk down into TLOFMachO and + // stuff like "TLSExtraDataSection" should no longer be part of the parent + // TLOF class. This will also make it more obvious that stuff like + // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho + // specific code. + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { + // Emit the .tbss symbol + MCSymbol *MangSym = + OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value()); + } else if (GVKind.isThreadData()) { + OutStreamer->SwitchSection(TheSection); + + EmitAlignment(Alignment, GV); + OutStreamer->EmitLabel(MangSym); + + EmitGlobalConstant(GV->getParent()->getDataLayout(), + GV->getInitializer()); + } + + OutStreamer->AddBlankLine(); + + // Emit the variable struct for the runtime. + MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); + + OutStreamer->SwitchSection(TLVSect); + // Emit the linkage here. + EmitLinkage(GV, GVSym); + OutStreamer->EmitLabel(GVSym); + + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer, used when mapped by the runtime + // - pointer to mangled symbol above with initializer + unsigned PtrSize = DL.getPointerTypeSize(GV->getType()); + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + PtrSize); + OutStreamer->EmitIntValue(0, PtrSize); + OutStreamer->EmitSymbolValue(MangSym, PtrSize); + + OutStreamer->AddBlankLine(); + return; + } + + MCSymbol *EmittedInitSym = GVSym; + + OutStreamer->SwitchSection(TheSection); + + EmitLinkage(GV, EmittedInitSym); + EmitAlignment(Alignment, GV); + + OutStreamer->EmitLabel(EmittedInitSym); + + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + + if (MAI->hasDotTypeDotSizeDirective()) + // .size foo, 42 + OutStreamer->emitELFSize(EmittedInitSym, + MCConstantExpr::create(Size, OutContext)); + + OutStreamer->AddBlankLine(); +} + +/// Emit the directive and value for debug thread local expression +/// +/// \p Value - The value to emit. +/// \p Size - The size of the integer (in bytes) to emit. +void AsmPrinter::EmitDebugValue(const MCExpr *Value, unsigned Size) const { + OutStreamer->EmitValue(Value, Size); +} + +/// EmitFunctionHeader - This method emits the header for the current +/// function. +void AsmPrinter::EmitFunctionHeader() { + const Function &F = MF->getFunction(); + + if (isVerbose()) + OutStreamer->GetCommentOS() + << "-- Begin function " + << GlobalValue::dropLLVMManglingEscape(F.getName()) << '\n'; + + // Print out constants referenced by the function + EmitConstantPool(); + + // Print the 'header' of function. + OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM)); + EmitVisibility(CurrentFnSym, F.getVisibility()); + + if (MAI->needsFunctionDescriptors() && + F.getLinkage() != GlobalValue::InternalLinkage) + EmitLinkage(&F, CurrentFnDescSym); + + EmitLinkage(&F, CurrentFnSym); + if (MAI->hasFunctionAlignment()) + EmitAlignment(MF->getAlignment(), &F); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + + if (F.hasFnAttribute(Attribute::Cold)) + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold); + + if (isVerbose()) { + F.printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, F.getParent()); + OutStreamer->GetCommentOS() << '\n'; + } + + // Emit the prefix data. + if (F.hasPrefixData()) { + if (MAI->hasSubsectionsViaSymbols()) { + // Preserving prefix data on platforms which use subsections-via-symbols + // is a bit tricky. Here we introduce a symbol for the prefix data + // and use the .alt_entry attribute to mark the function's real entry point + // as an alternative entry point to the prefix-data symbol. + MCSymbol *PrefixSym = OutContext.createLinkerPrivateTempSymbol(); + OutStreamer->EmitLabel(PrefixSym); + + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); + + // Emit an .alt_entry directive for the actual function symbol. + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_AltEntry); + } else { + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrefixData()); + } + } + + // Emit the function descriptor. This is a virtual function to allow targets + // to emit their specific function descriptor. + if (MAI->needsFunctionDescriptors()) + EmitFunctionDescriptor(); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to do + // their wild and crazy things as required. + EmitFunctionEntryLabel(); + + // If the function had address-taken blocks that got deleted, then we have + // references to the dangling symbols. Emit them at the start of the function + // so that we don't get references to undefined symbols. + std::vector<MCSymbol*> DeadBlockSyms; + MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms); + for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + OutStreamer->AddComment("Address taken block that was later removed"); + OutStreamer->EmitLabel(DeadBlockSyms[i]); + } + + if (CurrentFnBegin) { + if (MAI->useAssignmentForEHBegin()) { + MCSymbol *CurPos = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(CurPos); + OutStreamer->EmitAssignment(CurrentFnBegin, + MCSymbolRefExpr::create(CurPos, OutContext)); + } else { + OutStreamer->EmitLabel(CurrentFnBegin); + } + } + + // Emit pre-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->beginFunction(MF); + } + + // Emit the prologue data. + if (F.hasPrologueData()) + EmitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); +} + +/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the +/// function. This can be overridden by targets as required to do custom stuff. +void AsmPrinter::EmitFunctionEntryLabel() { + CurrentFnSym->redefineIfPossible(); + + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (CurrentFnSym->isVariable()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' is a protected alias"); + if (CurrentFnSym->isDefined()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); + + return OutStreamer->EmitLabel(CurrentFnSym); +} + +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getMF(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Check for spills and reloads + + // We assume a single instruction only has a spill or reload, not + // both. + Optional<unsigned> Size; + if ((Size = MI.getRestoreSize(TII))) { + CommentOS << *Size << "-byte Reload\n"; + } else if ((Size = MI.getFoldedRestoreSize(TII))) { + if (*Size) + CommentOS << *Size << "-byte Folded Reload\n"; + } else if ((Size = MI.getSpillSize(TII))) { + CommentOS << *Size << "-byte Spill\n"; + } else if ((Size = MI.getFoldedSpillSize(TII))) { + if (*Size) + CommentOS << *Size << "-byte Folded Spill\n"; + } + + // Check for spill-induced copies + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; +} + +/// emitImplicitDef - This method emits the specified machine instruction +/// that is an implicit def. +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { + Register RegNo = MI->getOperand(0).getReg(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << printReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + OutStreamer->AddComment(OS.str()); + OutStreamer->AddBlankLine(); +} + +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { + std::string Str; + raw_string_ostream OS(Str); + OS << "kill:"; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + assert(Op.isReg() && "KILL instruction must have only register operands"); + OS << ' ' << (Op.isDef() ? "def " : "killed ") + << printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo()); + } + AP.OutStreamer->AddComment(OS.str()); + AP.OutStreamer->AddBlankLine(); +} + +/// emitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 4-operand target-independent form. + if (MI->getNumOperands() != 4) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "DEBUG_VALUE: "; + + const DILocalVariable *V = MI->getDebugVariable(); + if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { + StringRef Name = SP->getName(); + if (!Name.empty()) + OS << Name << ":"; + } + OS << V->getName(); + OS << " <- "; + + // The second operand is only an offset if it's an immediate. + bool MemLoc = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = MemLoc ? MI->getOperand(1).getImm() : 0; + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->getNumElements()) { + OS << '['; + bool NeedSep = false; + for (auto Op : Expr->expr_ops()) { + if (NeedSep) + OS << ", "; + else + NeedSep = true; + OS << dwarf::OperationEncodingString(Op.getOp()); + for (unsigned I = 0; I < Op.getNumArgs(); ++I) + OS << ' ' << Op.getArg(I); + } + OS << "] "; + } + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); + } else { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + MemLoc = true; + } + if (Reg == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; + } + if (MemLoc) + OS << '['; + OS << printReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); + } + + if (MemLoc) + OS << '+' << Offset << ']'; + + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; +} + +/// This method handles the target-independent form of DBG_LABEL, returning +/// true if it was able to do so. A false return means the target will need +/// to handle MI in EmitInstruction. +static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) { + if (MI->getNumOperands() != 1) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "DEBUG_LABEL: "; + + const DILabel *V = MI->getDebugLabel(); + if (auto *SP = dyn_cast<DISubprogram>( + V->getScope()->getNonLexicalBlockFileScope())) { + StringRef Name = SP->getName(); + if (!Name.empty()) + OS << Name << ":"; + } + OS << V->getName(); + + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; +} + +AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() const { + if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && + MF->getFunction().needsUnwindTableEntry()) + return CFI_M_EH; + + if (MMI->hasDebugInfo()) + return CFI_M_Debug; + + return CFI_M_None; +} + +bool AsmPrinter::needsSEHMoves() { + return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry(); +} + +void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); + if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + ExceptionHandlingType != ExceptionHandling::ARM) + return; + + if (needsCFIMoves() == CFI_M_None) + return; + + // If there is no "real" instruction following this CFI instruction, skip + // emitting it; it would be beyond the end of the function's FDE range. + auto *MBB = MI.getParent(); + auto I = std::next(MI.getIterator()); + while (I != MBB->end() && I->isTransient()) + ++I; + if (I == MBB->instr_end() && + MBB->getReverseIterator() == MBB->getParent()->rbegin()) + return; + + const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions(); + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + emitCFIInstruction(CFI); +} + +void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { + // The operands are the MCSymbol and the frame offset of the allocation. + MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol(); + int FrameOffset = MI.getOperand(1).getImm(); + + // Emit a symbol assignment. + OutStreamer->EmitAssignment(FrameAllocSym, + MCConstantExpr::create(FrameOffset, OutContext)); +} + +void AsmPrinter::emitStackSizeSection(const MachineFunction &MF) { + if (!MF.getTarget().Options.EmitStackSizeSection) + return; + + MCSection *StackSizeSection = + getObjFileLowering().getStackSizesSection(*getCurrentSection()); + if (!StackSizeSection) + return; + + const MachineFrameInfo &FrameInfo = MF.getFrameInfo(); + // Don't emit functions with dynamic stack allocations. + if (FrameInfo.hasVarSizedObjects()) + return; + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(StackSizeSection); + + const MCSymbol *FunctionSymbol = getFunctionBegin(); + uint64_t StackSize = FrameInfo.getStackSize(); + OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize()); + OutStreamer->EmitULEB128IntValue(StackSize); + + OutStreamer->PopSection(); +} + +static bool needFuncLabelsForEHOrDebugInfo(const MachineFunction &MF, + MachineModuleInfo *MMI) { + if (!MF.getLandingPads().empty() || MF.hasEHFunclets() || MMI->hasDebugInfo()) + return true; + + // We might emit an EH table that uses function begin and end labels even if + // we don't have any landingpads. + if (!MF.getFunction().hasPersonalityFn()) + return false; + return !isNoOpWithoutInvoke( + classifyEHPersonality(MF.getFunction().getPersonalityFn())); +} + +/// EmitFunctionBody - This method emits the body and trailer for a +/// function. +void AsmPrinter::EmitFunctionBody() { + EmitFunctionHeader(); + + // Emit target-specific gunk before the function body. + EmitFunctionBodyStart(); + + bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); + + if (isVerbose()) { + // Get MachineDominatorTree or compute it on the fly if it's unavailable + MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + if (!MDT) { + OwnedMDT = std::make_unique<MachineDominatorTree>(); + OwnedMDT->getBase().recalculate(*MF); + MDT = OwnedMDT.get(); + } + + // Get MachineLoopInfo or compute it on the fly if it's unavailable + MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + if (!MLI) { + OwnedMLI = std::make_unique<MachineLoopInfo>(); + OwnedMLI->getBase().analyze(MDT->getBase()); + MLI = OwnedMLI.get(); + } + } + + // Print out code for the function. + bool HasAnyRealCode = false; + int NumInstsInFunction = 0; + for (auto &MBB : *MF) { + // Print a label for the basic block. + EmitBasicBlockStart(MBB); + for (auto &MI : MBB) { + // Print the assembly for the instruction. + if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && + !MI.isDebugInstr()) { + HasAnyRealCode = true; + ++NumInstsInFunction; + } + + // If there is a pre-instruction symbol, emit a label for it here. If the + // instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. + if (MCSymbol *S = MI.getPreInstrSymbol()) + if (S->isUndefined()) + OutStreamer->EmitLabel(S); + + if (ShouldPrintDebugScopes) { + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, + TimePassesIsEnabled); + HI.Handler->beginInstruction(&MI); + } + } + + if (isVerbose()) + emitComments(MI, OutStreamer->GetCommentOS()); + + switch (MI.getOpcode()) { + case TargetOpcode::CFI_INSTRUCTION: + emitCFIInstruction(MI); + break; + case TargetOpcode::LOCAL_ESCAPE: + emitFrameAlloc(MI); + break; + case TargetOpcode::ANNOTATION_LABEL: + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::INLINEASM: + case TargetOpcode::INLINEASM_BR: + EmitInlineAsm(&MI); + break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!emitDebugValueComment(&MI, *this)) + EmitInstruction(&MI); + } + break; + case TargetOpcode::DBG_LABEL: + if (isVerbose()) { + if (!emitDebugLabelComment(&MI, *this)) + EmitInstruction(&MI); + } + break; + case TargetOpcode::IMPLICIT_DEF: + if (isVerbose()) emitImplicitDef(&MI); + break; + case TargetOpcode::KILL: + if (isVerbose()) emitKill(&MI, *this); + break; + default: + EmitInstruction(&MI); + break; + } + + // If there is a post-instruction symbol, emit a label for it here. If + // the instruction was duplicated and the label has already been emitted, + // don't re-emit the same label. + // FIXME: Consider strengthening that to an assertion. + if (MCSymbol *S = MI.getPostInstrSymbol()) + if (S->isUndefined()) + OutStreamer->EmitLabel(S); + + if (ShouldPrintDebugScopes) { + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, + HI.TimerGroupName, HI.TimerGroupDescription, + TimePassesIsEnabled); + HI.Handler->endInstruction(); + } + } + } + + EmitBasicBlockEnd(MBB); + } + + EmittedInsts += NumInstsInFunction; + MachineOptimizationRemarkAnalysis R(DEBUG_TYPE, "InstructionCount", + MF->getFunction().getSubprogram(), + &MF->front()); + R << ore::NV("NumInstructions", NumInstsInFunction) + << " instructions in function"; + ORE->emit(R); + + // If the function is empty and the object file uses .subsections_via_symbols, + // then we need to emit *something* to the function body to prevent the + // labels from collapsing together. Just emit a noop. + // Similarly, don't emit empty functions on Windows either. It can lead to + // duplicate entries (two functions with the same RVA) in the Guard CF Table + // after linking, causing the kernel not to load the binary: + // https://developercommunity.visualstudio.com/content/problem/45366/vc-linker-creates-invalid-dll-with-clang-cl.html + // FIXME: Hide this behind some API in e.g. MCAsmInfo or MCTargetStreamer. + const Triple &TT = TM.getTargetTriple(); + if (!HasAnyRealCode && (MAI->hasSubsectionsViaSymbols() || + (TT.isOSWindows() && TT.isOSBinFormatCOFF()))) { + MCInst Noop; + MF->getSubtarget().getInstrInfo()->getNoop(Noop); + + // Targets can opt-out of emitting the noop here by leaving the opcode + // unspecified. + if (Noop.getOpcode()) { + OutStreamer->AddComment("avoids zero-length function"); + OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } + } + + const Function &F = MF->getFunction(); + for (const auto &BB : F) { + if (!BB.hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(&BB); + if (Sym->isDefined()) + continue; + OutStreamer->AddComment("Address of block that was removed by CodeGen"); + OutStreamer->EmitLabel(Sym); + } + + // Emit target-specific gunk after the function body. + EmitFunctionBodyEnd(); + + if (needFuncLabelsForEHOrDebugInfo(*MF, MMI) || + MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function. + CurrentFnEnd = createTempSymbol("func_end"); + OutStreamer->EmitLabel(CurrentFnEnd); + } + + // If the target wants a .size directive for the size of the function, emit + // it. + if (MAI->hasDotTypeDotSizeDirective()) { + // We can get the size as difference between the function label and the + // temp label. + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentFnEnd, OutContext), + MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext); + OutStreamer->emitELFSize(CurrentFnSym, SizeExp); + } + + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->markFunctionEnd(); + } + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); + + // Emit post-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endFunction(MF); + } + + // Emit section containing stack size metadata. + emitStackSizeSection(*MF); + + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- End function\n"; + + OutStreamer->AddBlankLine(); +} + +/// Compute the number of Global Variables that uses a Constant. +static unsigned getNumGlobalVariableUses(const Constant *C) { + if (!C) + return 0; + + if (isa<GlobalVariable>(C)) + return 1; + + unsigned NumUses = 0; + for (auto *CU : C->users()) + NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); + + return NumUses; +} + +/// Only consider global GOT equivalents if at least one user is a +/// cstexpr inside an initializer of another global variables. Also, don't +/// handle cstexpr inside instructions. During global variable emission, +/// candidates are skipped and are emitted later in case at least one cstexpr +/// isn't replaced by a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV, + unsigned &NumGOTEquivUsers) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. They must point to a + // GlobalVariable or Function, i.e., as GlobalValue. + if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() || + !GV->isConstant() || !GV->isDiscardableIfUnused() || + !isa<GlobalValue>(GV->getOperand(0))) + return false; + + // To be a got equivalent, at least one of its users need to be a constant + // expression used by another global variable. + for (auto *U : GV->users()) + NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); + + return NumGOTEquivUsers > 0; +} + +/// Unnamed constant global variables solely contaning a pointer to +/// another globals variable is equivalent to a GOT table entry; it contains the +/// the address of another symbol. Optimize it and replace accesses to these +/// "GOT equivalents" by using the GOT entry for the final global instead. +/// Compute GOT equivalent candidates among all global variables to avoid +/// emitting them if possible later on, after it use is replaced by a GOT entry +/// access. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + unsigned NumGOTEquivUsers = 0; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + continue; + + const MCSymbol *GOTEquivSym = getSymbol(&G); + GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); + } +} + +/// Constant expressions using GOT equivalent globals may not be eligible +/// for PC relative GOT entry conversion, in such cases we need to emit such +/// globals we previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + SmallVector<const GlobalVariable *, 8> FailedCandidates; + for (auto &I : GlobalGOTEquivs) { + const GlobalVariable *GV = I.second.first; + unsigned Cnt = I.second.second; + if (Cnt) + FailedCandidates.push_back(GV); + } + GlobalGOTEquivs.clear(); + + for (auto *GV : FailedCandidates) + EmitGlobalVariable(GV); +} + +void AsmPrinter::emitGlobalIndirectSymbol(Module &M, + const GlobalIndirectSymbol& GIS) { + MCSymbol *Name = getSymbol(&GIS); + + if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); + else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage"); + + bool IsFunction = GIS.getValueType()->isFunctionTy(); + + // Treat bitcasts of functions as functions also. This is important at least + // on WebAssembly where object and function addresses can't alias each other. + if (!IsFunction) + if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol())) + if (CE->getOpcode() == Instruction::BitCast) + IsFunction = + CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy(); + + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (IsFunction) + OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS) + ? MCSA_ELF_TypeIndFunction + : MCSA_ELF_TypeFunction); + + EmitVisibility(Name, GIS.getVisibility()); + + const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol()); + + if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr)) + OutStreamer->EmitSymbolAttribute(Name, MCSA_AltEntry); + + // Emit the directives as assignments aka .set: + OutStreamer->EmitAssignment(Name, Expr); + + if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) { + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = GA->getBaseObject(); + if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GA->getValueType()); + OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext)); + } + } +} + +void AsmPrinter::emitRemarksSection(Module &M) { + RemarkStreamer *RS = M.getContext().getRemarkStreamer(); + if (!RS) + return; + remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer(); + + Optional<SmallString<128>> Filename; + if (Optional<StringRef> FilenameRef = RS->getFilename()) { + Filename = *FilenameRef; + sys::fs::make_absolute(*Filename); + assert(!Filename->empty() && "The filename can't be empty."); + } + + std::string Buf; + raw_string_ostream OS(Buf); + std::unique_ptr<remarks::MetaSerializer> MetaSerializer = + Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename)) + : RemarkSerializer.metaSerializer(OS); + MetaSerializer->emit(); + + // Switch to the right section: .remarks/__remarks. + MCSection *RemarksSection = + OutContext.getObjectFileInfo()->getRemarksSection(); + OutStreamer->SwitchSection(RemarksSection); + + OutStreamer->EmitBinaryData(OS.str()); +} + +bool AsmPrinter::doFinalization(Module &M) { + // Set the MachineFunction to nullptr so that we can catch attempted + // accesses to MF specific features at the module level and so that + // we can conditionalize accesses based on whether or not it is nullptr. + MF = nullptr; + + // Gather all GOT equivalent globals in the module. We really need two + // passes over the globals: one to compute and another to avoid its emission + // in EmitGlobalVariable, otherwise we would not be able to handle cases + // where the got equivalent shows up before its use. + computeGlobalGOTEquivs(M); + + // Emit global variables. + for (const auto &G : M.globals()) + EmitGlobalVariable(&G); + + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + + // Emit visibility info for declarations + for (const Function &F : M) { + if (!F.isDeclarationForLinker()) + continue; + GlobalValue::VisibilityTypes V = F.getVisibility(); + if (V == GlobalValue::DefaultVisibility) + continue; + + MCSymbol *Name = getSymbol(&F); + EmitVisibility(Name, V, false); + } + + // Emit the remarks section contents. + // FIXME: Figure out when is the safest time to emit this section. It should + // not come after debug info. + if (EnableRemarksSection) + emitRemarksSection(M); + + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + TLOF.emitModuleMetadata(*OutStreamer, M); + + if (TM.getTargetTriple().isOSBinFormatELF()) { + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer->SwitchSection(TLOF.getDataSection()); + const DataLayout &DL = M.getDataLayout(); + + EmitAlignment(Align(DL.getPointerSize())); + for (const auto &Stub : Stubs) { + OutStreamer->EmitLabel(Stub.first); + OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + DL.getPointerSize()); + } + } + } + + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + MachineModuleInfoCOFF &MMICOFF = + MMI->getObjFileInfo<MachineModuleInfoCOFF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoCOFF::SymbolListTy Stubs = MMICOFF.GetGVStubList(); + if (!Stubs.empty()) { + const DataLayout &DL = M.getDataLayout(); + + for (const auto &Stub : Stubs) { + SmallString<256> SectionName = StringRef(".rdata$"); + SectionName += Stub.first->getName(); + OutStreamer->SwitchSection(OutContext.getCOFFSection( + SectionName, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_LNK_COMDAT, + SectionKind::getReadOnly(), Stub.first->getName(), + COFF::IMAGE_COMDAT_SELECT_ANY)); + EmitAlignment(Align(DL.getPointerSize())); + OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global); + OutStreamer->EmitLabel(Stub.first); + OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + DL.getPointerSize()); + } + } + } + + // Finalize debug and EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName, + HI.TimerGroupDescription, TimePassesIsEnabled); + HI.Handler->endModule(); + } + Handlers.clear(); + DD = nullptr; + + // If the target wants to know about weak references, print them all. + if (MAI->getWeakRefDirective()) { + // FIXME: This is not lazy, it would be nice to only print weak references + // to stuff that is actually used. Note that doing so would require targets + // to notice uses in operands (due to constant exprs etc). This should + // happen with the MC stuff eventually. + + // Print out module-level global objects here. + for (const auto &GO : M.global_objects()) { + if (!GO.hasExternalWeakLinkage()) + continue; + OutStreamer->EmitSymbolAttribute(getSymbol(&GO), MCSA_WeakReference); + } + } + + OutStreamer->AddBlankLine(); + + // Print aliases in topological order, that is, for each alias a = b, + // b must be printed before a. + // This is because on some targets (e.g. PowerPC) linker expects aliases in + // such an order to generate correct TOC information. + SmallVector<const GlobalAlias *, 16> AliasStack; + SmallPtrSet<const GlobalAlias *, 16> AliasVisited; + for (const auto &Alias : M.aliases()) { + for (const GlobalAlias *Cur = &Alias; Cur; + Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) { + if (!AliasVisited.insert(Cur).second) + break; + AliasStack.push_back(Cur); + } + for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack)) + emitGlobalIndirectSymbol(M, *AncestorAlias); + AliasStack.clear(); + } + for (const auto &IFunc : M.ifuncs()) + emitGlobalIndirectSymbol(M, IFunc); + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) + MP->finishAssembly(M, *MI, *this); + + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + + // Emit bytes for llvm.commandline metadata. + EmitModuleCommandLines(M); + + // Emit __morestack address if needed for indirect calls. + if (MMI->usesMorestackAddr()) { + unsigned Align = 1; + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( + getDataLayout(), SectionKind::getReadOnly(), + /*C=*/nullptr, Align); + OutStreamer->SwitchSection(ReadOnlySection); + + MCSymbol *AddrSymbol = + OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); + OutStreamer->EmitLabel(AddrSymbol); + + unsigned PtrSize = MAI->getCodePointerSize(); + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); + } + + // Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if + // split-stack is used. + if (TM.getTargetTriple().isOSBinFormatELF() && MMI->hasSplitStack()) { + OutStreamer->SwitchSection( + OutContext.getELFSection(".note.GNU-split-stack", ELF::SHT_PROGBITS, 0)); + if (MMI->hasNosplitStack()) + OutStreamer->SwitchSection( + OutContext.getELFSection(".note.GNU-no-split-stack", ELF::SHT_PROGBITS, 0)); + } + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + OutStreamer->SwitchSection(S); + + if (TM.getTargetTriple().isOSBinFormatCOFF()) { + // Emit /EXPORT: flags for each exported global as necessary. + const auto &TLOF = getObjFileLowering(); + std::string Flags; + + for (const GlobalValue &GV : M.global_values()) { + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForGlobal(OS, &GV); + OS.flush(); + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + + // Emit /INCLUDE: flags for each used global as necessary. + if (const auto *LU = M.getNamedGlobal("llvm.used")) { + assert(LU->hasInitializer() && + "expected llvm.used to have an initializer"); + assert(isa<ArrayType>(LU->getValueType()) && + "expected llvm.used to be an array type"); + if (const auto *A = cast<ConstantArray>(LU->getInitializer())) { + for (const Value *Op : A->operands()) { + const auto *GV = cast<GlobalValue>(Op->stripPointerCasts()); + // Global symbols with internal or private linkage are not visible to + // the linker, and thus would cause an error when the linker tried to + // preserve the symbol due to the `/include:` directive. + if (GV->hasLocalLinkage()) + continue; + + raw_string_ostream OS(Flags); + TLOF.emitLinkerFlagsForUsed(OS, GV); + OS.flush(); + + if (!Flags.empty()) { + OutStreamer->SwitchSection(TLOF.getDrectveSection()); + OutStreamer->EmitBytes(Flags); + } + Flags.clear(); + } + } + } + } + + if (TM.Options.EmitAddrsig) { + // Emit address-significance attributes for all globals. + OutStreamer->EmitAddrsig(); + for (const GlobalValue &GV : M.global_values()) + if (!GV.use_empty() && !GV.isThreadLocal() && + !GV.hasDLLImportStorageClass() && !GV.getName().startswith("llvm.") && + !GV.hasAtLeastLocalUnnamedAddr()) + OutStreamer->EmitAddrsigSym(getSymbol(&GV)); + } + + // Emit symbol partition specifications (ELF only). + if (TM.getTargetTriple().isOSBinFormatELF()) { + unsigned UniqueID = 0; + for (const GlobalValue &GV : M.global_values()) { + if (!GV.hasPartition() || GV.isDeclarationForLinker() || + GV.getVisibility() != GlobalValue::DefaultVisibility) + continue; + + OutStreamer->SwitchSection(OutContext.getELFSection( + ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID)); + OutStreamer->EmitBytes(GV.getPartition()); + OutStreamer->EmitZeros(1); + OutStreamer->EmitValue( + MCSymbolRefExpr::create(getSymbol(&GV), OutContext), + MAI->getCodePointerSize()); + } + } + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + + MMI = nullptr; + + OutStreamer->Finish(); + OutStreamer->reset(); + OwnedMLI.reset(); + OwnedMDT.reset(); + + return false; +} + +MCSymbol *AsmPrinter::getCurExceptionSym() { + if (!CurExceptionSym) + CurExceptionSym = createTempSymbol("exception"); + return CurExceptionSym; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + this->MF = &MF; + + // Get the function symbol. + if (MAI->needsFunctionDescriptors()) { + assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only" + " supported on AIX."); + assert(CurrentFnDescSym && "The function descriptor symbol needs to be" + " initalized first."); + + // Get the function entry point symbol. + CurrentFnSym = + OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName()); + + const Function &F = MF.getFunction(); + MCSectionXCOFF *FnEntryPointSec = + cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM)); + // Set the containing csect. + cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec); + } else { + CurrentFnSym = getSymbol(&MF.getFunction()); + } + + CurrentFnSymForSize = CurrentFnSym; + CurrentFnBegin = nullptr; + CurExceptionSym = nullptr; + bool NeedsLocalForSize = MAI->needsLocalForSize(); + if (needFuncLabelsForEHOrDebugInfo(MF, MMI) || NeedsLocalForSize || + MF.getTarget().Options.EmitStackSizeSection) { + CurrentFnBegin = createTempSymbol("func_begin"); + if (NeedsLocalForSize) + CurrentFnSymForSize = CurrentFnBegin; + } + + ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE(); +} + +namespace { + +// Keep track the alignment, constpool entries per Section. + struct SectionCPs { + MCSection *S; + unsigned Alignment; + SmallVector<unsigned, 4> CPEs; + + SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {} + }; + +} // end anonymous namespace + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +void AsmPrinter::EmitConstantPool() { + const MachineConstantPool *MCP = MF->getConstantPool(); + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector<SectionCPs, 4> CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const MachineConstantPoolEntry &CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + + SectionKind Kind = CPE.getSectionKind(&getDataLayout()); + + const Constant *C = nullptr; + if (!CPE.isMachineConstantPoolEntry()) + C = CPE.Val.ConstVal; + + MCSection *S = getObjFileLowering().getSectionForConstant(getDataLayout(), + Kind, C, Align); + + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + const MCSection *CurSection = nullptr; + unsigned Offset = 0; + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MCSymbol *Sym = GetCPISymbol(CPI); + if (!Sym->isUndefined()) + continue; + + if (CurSection != CPSections[i].S) { + OutStreamer->SwitchSection(CPSections[i].S); + EmitAlignment(Align(CPSections[i].Alignment)); + CurSection = CPSections[i].S; + Offset = 0; + } + + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + OutStreamer->EmitZeros(NewOffset - Offset); + + Type *Ty = CPE.getType(); + Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); + + OutStreamer->EmitLabel(Sym); + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +void AsmPrinter::EmitJumpTableInfo() { + const DataLayout &DL = MF->getDataLayout(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (!MJTI) return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + const Function &F = MF->getFunction(); + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, + F); + if (JTInDiffSection) { + // Drop it in the readonly section. + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(F, TM); + OutStreamer->SwitchSection(ReadOnlySection); + } + + EmitAlignment(Align(MJTI->getEntryAlignment(DL))); + + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer->EmitDataRegion(MCDR_DataRegionJT32); + + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For the EK_LabelDifference32 entry, if using .set avoids a relocation, + /// emit a .set directive for each unique entry. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + MAI->doesSetDirectiveSuppressReloc()) { + SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + const MachineBasicBlock *MBB = JTBBs[ii]; + if (!EmittedSets.insert(MBB).second) + continue; + + // .set LJTSet, LBB32-base + const MCExpr *LHS = + MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::createSub(LHS, Base, + OutContext)); + } + } + + // On some targets (e.g. Darwin) we want to emit two consecutive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix()) + // FIXME: This doesn't have to have any specific name, just any randomly + // named and numbered 'l' label would work. Simplify GetJTISymbol. + OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); + + OutStreamer->EmitLabel(GetJTISymbol(JTI)); + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); + } + if (!JTInDiffSection) + OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); +} + +/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the +/// current stream. +void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned UID) const { + assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); + const MCExpr *Value = nullptr; + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + llvm_unreachable("Cannot emit EK_Inline jump table entry"); + case MachineJumpTableInfo::EK_Custom32: + Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); + break; + case MachineJumpTableInfo::EK_BlockAddress: + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + break; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: { + // EK_GPRel32BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gprel32 LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_GPRel64BlockAddress: { + // EK_GPRel64BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gpdword LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_LabelDifference32: { + // Each entry is the address of the block minus the address of the jump + // table. This is used for PIC jump tables where gprel32 is not supported. + // e.g.: + // .word LBB123 - LJTI1_2 + // If the .set directive avoids relocations, this is emitted as: + // .set L4_5_set_123, LBB123 - LJTI1_2 + // .word L4_5_set_123 + if (MAI->doesSetDirectiveSuppressReloc()) { + Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()), + OutContext); + break; + } + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + break; + } + } + + assert(Value && "Unknown entry kind!"); + + unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); + OutStreamer->EmitValue(Value, EntrySize); +} + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (MAI->hasNoDeadStrip()) // No need to emit this at all. + EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); + return true; + } + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (GV->getSection() == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + if (GV->getName() == "llvm.global_ctors") { + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ true); + + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ false); + + return true; + } + + report_fatal_error("unknown special variable"); +} + +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each +/// global in the specified llvm.used list. +void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { + // Should be an array of 'i8*'. + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = + dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); + if (GV) + OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); + } +} + +namespace { + +struct Structor { + int Priority = 0; + Constant *Func = nullptr; + GlobalValue *ComdatKey = nullptr; + + Structor() = default; +}; + +} // end anonymous namespace + +/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init +/// priority. +void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, + bool isCtor) { + // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the + // init priority. + if (!isa<ConstantArray>(List)) return; + + // Sanity check the structors list. + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (!InitList) return; // Not an array! + StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + if (!ETy || ETy->getNumElements() != 3 || + !isa<IntegerType>(ETy->getTypeAtIndex(0U)) || + !isa<PointerType>(ETy->getTypeAtIndex(1U)) || + !isa<PointerType>(ETy->getTypeAtIndex(2U))) + return; // Not (int, ptr, ptr). + + // Gather the structors in a form that's convenient for sorting by priority. + SmallVector<Structor, 8> Structors; + for (Value *O : InitList->operands()) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(O); + if (!CS) continue; // Malformed. + if (CS->getOperand(1)->isNullValue()) + break; // Found a null terminator, skip the rest. + ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) continue; // Malformed. + Structors.push_back(Structor()); + Structor &S = Structors.back(); + S.Priority = Priority->getLimitedValue(65535); + S.Func = CS->getOperand(1); + if (!CS->getOperand(2)->isNullValue()) + S.ComdatKey = + dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); + } + + // Emit the function pointers in the target-specific order + llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) { + return L.Priority < R.Priority; + }); + const Align Align = DL.getPointerPrefAlignment(); + for (Structor &S : Structors) { + const TargetLoweringObjectFile &Obj = getObjFileLowering(); + const MCSymbol *KeySym = nullptr; + if (GlobalValue *GV = S.ComdatKey) { + if (GV->isDeclarationForLinker()) + // If the associated variable is not defined in this module + // (it might be available_externally, or have been an + // available_externally definition that was dropped by the + // EliminateAvailableExternally pass), some other TU + // will provide its dynamic initializer. + continue; + + KeySym = getSymbol(GV); + } + MCSection *OutputSection = + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + : Obj.getStaticDtorSection(S.Priority, KeySym)); + OutStreamer->SwitchSection(OutputSection); + if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) + EmitAlignment(Align); + EmitXXStructor(DL, S.Func); + } +} + +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer->EmitIdent(S->getString()); + } + } +} + +void AsmPrinter::EmitModuleCommandLines(Module &M) { + MCSection *CommandLine = getObjFileLowering().getSectionForCommandLines(); + if (!CommandLine) + return; + + const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline"); + if (!NMD || !NMD->getNumOperands()) + return; + + OutStreamer->PushSection(); + OutStreamer->SwitchSection(CommandLine); + OutStreamer->EmitZeros(1); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.commandline metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer->EmitBytes(S->getString()); + OutStreamer->EmitZeros(1); + } + OutStreamer->PopSection(); +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// Emit a byte directive and value. +/// +void AsmPrinter::emitInt8(int Value) const { + OutStreamer->EmitIntValue(Value, 1); +} + +/// Emit a short directive and value. +void AsmPrinter::emitInt16(int Value) const { + OutStreamer->EmitIntValue(Value, 2); +} + +/// Emit a long directive and value. +void AsmPrinter::emitInt32(int Value) const { + OutStreamer->EmitIntValue(Value, 4); +} + +/// Emit a long long directive and value. +void AsmPrinter::emitInt64(uint64_t Value) const { + OutStreamer->EmitIntValue(Value, 8); +} + +/// Emit something like ".long Hi-Lo" where the size in bytes of the directive +/// is specified by Size and Hi/Lo specify the labels. This implicitly uses +/// .set if it avoids relocations. +void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, + unsigned Size) const { + OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size); +} + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size, + bool IsSectionRelative) const { + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { + OutStreamer->EmitCOFFSecRel32(Label, Offset); + if (Size > 4) + OutStreamer->EmitZeros(Size - 4); + return; + } + + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(Offset, OutContext), OutContext); + + OutStreamer->EmitValue(Expr, Size); +} + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. If a global value is specified, and if that global has +// an explicit alignment requested, it will override the alignment request +// if required for correctness. +void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const { + if (GV) + Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment); + + if (Alignment == Align::None()) + return; // 1-byte aligned: no need to emit alignment. + + if (getCurrentSection()->getKind().isText()) + OutStreamer->EmitCodeAlignment(Alignment.value()); + else + OutStreamer->EmitValueToAlignment(Alignment.value()); +} + +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + +const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { + MCContext &Ctx = OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return MCSymbolRefExpr::create(getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) + return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (!CE) { + llvm_unreachable("Unknown constant value to lower!"); + } + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + if (Constant *C = ConstantFoldConstant(CE, getDataLayout())) + if (C != CE) + return lowerConstant(C); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction().getParent()); + report_fatal_error(OS.str()); + } + case Instruction::GetElementPtr: { + // Generate a symbolic expression for the byte address + APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI); + + const MCExpr *Base = lowerConstant(CE->getOperand(0)); + if (!OffsetAI) + return Base; + + int64_t Offset = OffsetAI.getSExtValue(); + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + LLVM_FALLTHROUGH; + case Instruction::BitCast: + return lowerConstant(CE->getOperand(0)); + + case Instruction::IntToPtr: { + const DataLayout &DL = getDataLayout(); + + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), + false/*ZExt*/); + return lowerConstant(Op); + } + + case Instruction::PtrToInt: { + const DataLayout &DL = getDataLayout(); + + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = lowerConstant(Op); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + // + // If the pointer is larger than the resultant integer, then + // as with Trunc just depend on the assembler to truncate it. + if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); + } + + case Instruction::Sub: { + GlobalValue *LHSGV; + APInt LHSOffset; + if (IsConstantOffsetFromGlobal(CE->getOperand(0), LHSGV, LHSOffset, + getDataLayout())) { + GlobalValue *RHSGV; + APInt RHSOffset; + if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset, + getDataLayout())) { + const MCExpr *RelocExpr = + getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM); + if (!RelocExpr) + RelocExpr = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx), + MCSymbolRefExpr::create(getSymbol(RHSGV), Ctx), Ctx); + int64_t Addend = (LHSOffset - RHSOffset).getSExtValue(); + if (Addend != 0) + RelocExpr = MCBinaryExpr::createAdd( + RelocExpr, MCConstantExpr::create(Addend, Ctx), Ctx); + return RelocExpr; + } + } + } + // else fallthrough + LLVM_FALLTHROUGH; + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx); + } + } + } +} + +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, + AsmPrinter &AP, + const Constant *BaseCV = nullptr, + uint64_t Offset = 0); + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP); + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1. +} + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + uint64_t Size = DL.getTypeAllocSizeInBits(V->getType()); + assert(Size % 8 == 0); + + // Extend the element to take zero padding into account. + APInt Value = CI->getValue().zextOrSelf(Size); + if (!Value.isSplat(8)) + return -1; + + return Value.zextOrTrunc(8).getZExtValue(); + } + if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + // Make sure all array elements are sequences of the same repeated + // byte. + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); + Constant *Op0 = CA->getOperand(0); + int Byte = isRepeatedByteSequence(Op0, DL); + if (Byte == -1) + return -1; + + // All array elements must be equal. + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) + if (CA->getOperand(i) != Op0) + return -1; + return Byte; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) + return isRepeatedByteSequence(CDS); + + return -1; +} + +static void emitGlobalConstantDataSequential(const DataLayout &DL, + const ConstantDataSequential *CDS, + AsmPrinter &AP) { + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, DL); + if (Value != -1) { + uint64_t Bytes = DL.getTypeAllocSize(CDS->getType()); + // Don't emit a 1-byte object as a .fill. + if (Bytes > 1) + return AP.OutStreamer->emitFill(Bytes, Value); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer->EmitBytes(CDS->getAsString()); + + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa<IntegerType>(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + if (AP.isVerbose()) + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize); + } + } else { + Type *ET = CDS->getElementType(); + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(CDS->getElementAsAPFloat(I), ET, AP); + } + + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * + CDS->getNumElements(); + assert(EmittedSize <= Size && "Size cannot be less than EmittedSize!"); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer->EmitZeros(Padding); +} + +static void emitGlobalConstantArray(const DataLayout &DL, + const ConstantArray *CA, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, DL); + + if (Value != -1) { + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); + AP.OutStreamer->emitFill(Bytes, Value); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } + } +} + +static void emitGlobalConstantVector(const DataLayout &DL, + const ConstantVector *CV, AsmPrinter &AP) { + for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) + emitGlobalConstantImpl(DL, CV->getOperand(i), AP); + + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * + CV->getType()->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer->EmitZeros(Padding); +} + +static void emitGlobalConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + // Print the fields in successive locations. Pad to align if needed! + unsigned Size = DL.getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL.getStructLayout(CS->getType()); + uint64_t SizeSoFar = 0; + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + + // Print the actual field value. + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); + + // Check if padding is needed and insert one or more 0s. + uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); + uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) + - Layout->getElementOffset(i)) - FieldSize; + SizeSoFar += FieldSize + PadSize; + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + AP.OutStreamer->EmitZeros(PadSize); + } + assert(SizeSoFar == Layout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) { + assert(ET && "Unknown float type"); + APInt API = APF.bitcastToAPInt(); + + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + APF.toString(StrVal); + ET->print(AP.OutStreamer->GetCommentOS()); + AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; + } + + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); + + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.getDataLayout().isBigEndian() && !ET->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; + + if (TrailingBytes) + AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes); + + for (; Chunk >= 0; --Chunk) + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + } else { + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + + if (TrailingBytes) + AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes); + } + + // Emit the tail padding for the long double. + const DataLayout &DL = AP.getDataLayout(); + AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(ET) - DL.getTypeStoreSize(ET)); +} + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { + emitGlobalConstantFP(CFP->getValueAPF(), CFP->getType(), AP); +} + +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { + const DataLayout &DL = AP.getDataLayout(); + unsigned BitWidth = CI->getBitWidth(); + + // Copy the value as we may massage the layout for constants whose bit width + // is not a multiple of 64-bits. + APInt Realigned(CI->getValue()); + uint64_t ExtraBits = 0; + unsigned ExtraBitsSize = BitWidth & 63; + + if (ExtraBitsSize) { + // The bit width of the data is not a multiple of 64-bits. + // The extra bits are expected to be at the end of the chunk of the memory. + // Little endian: + // * Nothing to be done, just record the extra bits to emit. + // Big endian: + // * Record the extra bits to emit. + // * Realign the raw data to emit the chunks of 64-bits. + if (DL.isBigEndian()) { + // Basically the structure of the raw data is a chunk of 64-bits cells: + // 0 1 BitWidth / 64 + // [chunk1][chunk2] ... [chunkN]. + // The most significant chunk is chunkN and it should be emitted first. + // However, due to the alignment issue chunkN contains useless bits. + // Realign the chunks so that they contain only useless information: + // ExtraBits 0 1 (BitWidth / 64) - 1 + // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBits = Realigned.getRawData()[0] & + (((uint64_t)-1) >> (64 - ExtraBitsSize)); + Realigned.lshrInPlace(ExtraBitsSize); + } else + ExtraBits = Realigned.getRawData()[BitWidth / 64]; + } + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = Realigned.getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer->EmitIntValue(Val, 8); + } + + if (ExtraBitsSize) { + // Emit the extra bits after the 64-bits chunks. + + // Emit a directive that fills the expected size. + uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); + Size -= (BitWidth / 64) * 8; + assert(Size && Size * 8 >= ExtraBitsSize && + (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) + == ExtraBits && "Directive too small for extra bits."); + AP.OutStreamer->EmitIntValue(ExtraBits, Size); + } +} + +/// Transform a not absolute MCExpr containing a reference to a GOT +/// equivalent global, by a target specific GOT pc relative access to the +/// final symbol. +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, + const Constant *BaseCst, + uint64_t Offset) { + // The global @foo below illustrates a global that uses a got equivalent. + // + // @bar = global i32 42 + // @gotequiv = private unnamed_addr constant i32* @bar + // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64), + // i64 ptrtoint (i32* @foo to i64)) + // to i32) + // + // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually + // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the + // form: + // + // foo = cstexpr, where + // cstexpr := <gotequiv> - "." + <cst> + // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst> + // + // After canonicalization by evaluateAsRelocatable `ME` turns into: + // + // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where + // gotpcrelcst := <offset from @foo base> + <cst> + MCValue MV; + if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + return; + const MCSymbolRefExpr *SymA = MV.getSymA(); + if (!SymA) + return; + + // Check that GOT equivalent symbol is cached. + const MCSymbol *GOTEquivSym = &SymA->getSymbol(); + if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) + return; + + const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst); + if (!BaseGV) + return; + + // Check for a valid base symbol + const MCSymbol *BaseSym = AP.getSymbol(BaseGV); + const MCSymbolRefExpr *SymB = MV.getSymB(); + + if (!SymB || BaseSym != &SymB->getSymbol()) + return; + + // Make sure to match: + // + // gotpcrelcst := <offset from @foo base> + <cst> + // + // If gotpcrelcst is positive it means that we can safely fold the pc rel + // displacement into the GOTPCREL. We can also can have an extra offset <cst> + // if the target knows how to encode it. + int64_t GOTPCRelCst = Offset + MV.getConstant(); + if (GOTPCRelCst < 0) + return; + if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0) + return; + + // Emit the GOT PC relative to replace the got equivalent global, i.e.: + // + // bar: + // .long 42 + // gotequiv: + // .quad bar + // foo: + // .long gotequiv - "." + <cst> + // + // is replaced by the target specific equivalent to: + // + // bar: + // .long 42 + // foo: + // .long bar@GOTPCREL+<gotpcrelcst> + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; + const GlobalVariable *GV = Result.first; + int NumUses = (int)Result.second; + const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( + FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + + // Update GOT equivalent usage information + --NumUses; + if (NumUses >= 0) + AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); +} + +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, + AsmPrinter &AP, const Constant *BaseCV, + uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + + // Globals with sub-elements such as combinations of arrays and structs + // are handled recursively by emitGlobalConstantImpl. Keep track of the + // constant symbol base and the current position with BaseCV and Offset. + if (!BaseCV && CV->hasOneUse()) + BaseCV = dyn_cast<Constant>(CV->user_back()); + + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) + return AP.OutStreamer->EmitZeros(Size); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + switch (Size) { + case 1: + case 2: + case 4: + case 8: + if (AP.isVerbose()) + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); + AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size); + return; + default: + emitGlobalConstantLargeInt(CI, AP); + return; + } + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) + return emitGlobalConstantFP(CFP, AP); + + if (isa<ConstantPointerNull>(CV)) { + AP.OutStreamer->EmitIntValue(0, Size); + return; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) + return emitGlobalConstantDataSequential(DL, CDS, AP); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (CE->getOpcode() == Instruction::BitCast) + return emitGlobalConstantImpl(DL, CE->getOperand(0), AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstant(CE, DL); + if (New && New != CE) + return emitGlobalConstantImpl(DL, New, AP); + } + } + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return emitGlobalConstantVector(DL, V, AP); + + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it + // thread the streamer with EmitValue. + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); + + AP.OutStreamer->EmitValue(ME, Size); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + if (Size) + emitGlobalConstantImpl(DL, CV, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer->EmitIntValue(0, 1); + } +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); +} + +void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { + if (Offset > 0) + OS << '+' << Offset; + else if (Offset < 0) + OS << Offset; +} + +//===----------------------------------------------------------------------===// +// Symbol Lowering Routines. +//===----------------------------------------------------------------------===// + +MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { + return OutContext.createTempSymbol(Name, true); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { + return MMI->getAddrLabelSymbol(BA->getBasicBlock()); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { + return MMI->getAddrLabelSymbol(BB); +} + +/// GetCPISymbol - Return the symbol for the specified constant pool entry. +MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) { + const MachineConstantPoolEntry &CPE = + MF->getConstantPool()->getConstants()[CPID]; + if (!CPE.isMachineConstantPoolEntry()) { + const DataLayout &DL = MF->getDataLayout(); + SectionKind Kind = CPE.getSectionKind(&DL); + const Constant *C = CPE.Val.ConstVal; + unsigned Align = CPE.Alignment; + if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>( + getObjFileLowering().getSectionForConstant(DL, Kind, C, Align))) { + if (MCSymbol *Sym = S->getCOMDATSymbol()) { + if (Sym->isUndefined()) + OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + return Sym; + } + } + } + } + + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "CPI" + Twine(getFunctionNumber()) + "_" + + Twine(CPID)); +} + +/// GetJTISymbol - Return the symbol for the specified jump table entry. +MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { + return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate); +} + +/// GetJTSetSymbol - Return the symbol for the specified jump table .set +/// FIXME: privatize to AsmPrinter. +MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); +} + +MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const { + return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM); +} + +/// Return the MCSymbol for the specified ExternalSymbol. +MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { + SmallString<60> NameStr; + Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout()); + return OutContext.getOrCreateSymbol(NameStr); +} + +/// PrintParentLoopComment - Print comments about parent loops of this one. +static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + if (!Loop) return; + PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); + OS.indent(Loop->getLoopDepth()*2) + << "Parent Loop BB" << FunctionNumber << "_" + << Loop->getHeader()->getNumber() + << " Depth=" << Loop->getLoopDepth() << '\n'; +} + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + // Add child loop information + for (const MachineLoop *CL : *Loop) { + OS.indent(CL->getLoopDepth()*2) + << "Child Loop BB" << FunctionNumber << "_" + << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth() + << '\n'; + PrintChildLoopComment(OS, CL, FunctionNumber); + } +} + +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, + const MachineLoopInfo *LI, + const AsmPrinter &AP) { + // Add loop depth information + const MachineLoop *Loop = LI->getLoopFor(&MBB); + if (!Loop) return; + + MachineBasicBlock *Header = Loop->getHeader(); + assert(Header && "No header for loop"); + + // If this block is not a loop header, just print out what is the loop header + // and return. + if (Header != &MBB) { + AP.OutStreamer->AddComment(" in Loop: Header=BB" + + Twine(AP.getFunctionNumber())+"_" + + Twine(Loop->getHeader()->getNumber())+ + " Depth="+Twine(Loop->getLoopDepth())); + return; + } + + // Otherwise, it is a loop header. Print out information about child and + // parent loops. + raw_ostream &OS = AP.OutStreamer->GetCommentOS(); + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + + OS << "=>"; + OS.indent(Loop->getLoopDepth()*2-2); + + OS << "This "; + if (Loop->empty()) + OS << "Inner "; + OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; + + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); +} + +void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB, + MCCodePaddingContext &Context) const { + assert(MF != nullptr && "Machine function must be valid"); + Context.IsPaddingActive = !MF->hasInlineAsm() && + !MF->getFunction().hasOptSize() && + TM.getOptLevel() != CodeGenOpt::None; + Context.IsBasicBlockReachableViaFallthrough = + std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) != + MBB.pred_end(); + Context.IsBasicBlockReachableViaBranch = + MBB.pred_size() > 0 && !isBlockOnlyReachableByFallthrough(&MBB); +} + +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) { + // End the previous funclet and start a new one. + if (MBB.isEHFuncletEntry()) { + for (const HandlerInfo &HI : Handlers) { + HI.Handler->endFunclet(); + HI.Handler->beginFunclet(MBB); + } + } + + // Emit an alignment directive for this block, if needed. + const Align Alignment = MBB.getAlignment(); + if (Alignment != Align::None()) + EmitAlignment(Alignment); + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockStart(Context); + + // If the block has its address taken, emit any labels that were used to + // reference the block. It is possible that there is more than one label + // here, because multiple LLVM BB's may have been RAUW'd to this block after + // the references were generated. + if (MBB.hasAddressTaken()) { + const BasicBlock *BB = MBB.getBasicBlock(); + if (isVerbose()) + OutStreamer->AddComment("Block address taken"); + + // MBBs can have their address taken as part of CodeGen without having + // their corresponding BB's address taken in IR + if (BB->hasAddressTaken()) + for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + OutStreamer->EmitLabel(Sym); + } + + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + BB->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, BB->getModule()); + OutStreamer->GetCommentOS() << '\n'; + } + } + + assert(MLI != nullptr && "MachineLoopInfo should has been computed"); + emitBasicBlockLoopComments(MBB, MLI, *this); + } + + // Print the main label for the block. + if (MBB.pred_empty() || + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() && + !MBB.hasLabelMustBeEmitted())) { + if (isVerbose()) { + // NOTE: Want this comment at start of line, don't emit with AddComment. + OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", + false); + } + } else { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) + OutStreamer->AddComment("Label of block must be emitted"); + OutStreamer->EmitLabel(MBB.getSymbol()); + } +} + +void AsmPrinter::EmitBasicBlockEnd(const MachineBasicBlock &MBB) { + MCCodePaddingContext Context; + setupCodePaddingContext(MBB, Context); + OutStreamer->EmitCodePaddingBasicBlockEnd(Context); +} + +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition) const { + MCSymbolAttr Attr = MCSA_Invalid; + + switch (Visibility) { + default: break; + case GlobalValue::HiddenVisibility: + if (IsDefinition) + Attr = MAI->getHiddenVisibilityAttr(); + else + Attr = MAI->getHiddenDeclarationVisibilityAttr(); + break; + case GlobalValue::ProtectedVisibility: + Attr = MAI->getProtectedVisibilityAttr(); + break; + } + + if (Attr != MCSA_Invalid) + OutStreamer->EmitSymbolAttribute(Sym, Attr); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool AsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isEHPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + if (MBB->pred_size() > 1) + return false; + + // The predecessor has to be immediately before this block. + MachineBasicBlock *Pred = *MBB->pred_begin(); + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Check the terminators in the previous blocks + for (const auto &MI : Pred->terminators()) { + // If it is not a simple branch, we are in a table somewhere. + if (!MI.isBranch() || MI.isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not a fall + // through. Note that targets with delay slots will usually bundle + // terminators with the delay slot instruction. + for (ConstMIBundleOperands OP(MI); OP.isValid(); ++OP) { + if (OP->isJTI()) + return false; + if (OP->isMBB() && OP->getMBB() == MBB) + return false; + } + } + + return true; +} + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { + if (!S.usesMetadata()) + return nullptr; + + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + gcp_map_type::iterator GCPI = GCMap.find(&S); + if (GCPI != GCMap.end()) + return GCPI->second.get(); + + auto Name = S.getName(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (Name == I->getName()) { + std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate(); + GMP->S = &S; + auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); + return IterBool.first->second.get(); + } + + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); +} + +void AsmPrinter::emitStackMaps(StackMaps &SM) { + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + bool NeedsDefault = false; + if (MI->begin() == MI->end()) + // No GC strategy, use the default format. + NeedsDefault = true; + else + for (auto &I : *MI) { + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + if (MP->emitStackMaps(SM, *this)) + continue; + // The strategy doesn't have printer or doesn't emit custom stack maps. + // Use the default format. + NeedsDefault = true; + } + + if (NeedsDefault) + SM.serializeToStackMapSection(); +} + +/// Pin vtable to this file. +AsmPrinterHandler::~AsmPrinterHandler() = default; + +void AsmPrinterHandler::markFunctionEnd() {} + +// In the binary's "xray_instr_map" section, an array of these function entries +// describes each instrumentation point. When XRay patches your code, the index +// into this table will be given to your handler as a patch point identifier. +void AsmPrinter::XRayFunctionEntry::emit(int Bytes, MCStreamer *Out, + const MCSymbol *CurrentFnSym) const { + Out->EmitSymbolValue(Sled, Bytes); + Out->EmitSymbolValue(CurrentFnSym, Bytes); + auto Kind8 = static_cast<uint8_t>(Kind); + Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Kind8), 1)); + Out->EmitBinaryData( + StringRef(reinterpret_cast<const char *>(&AlwaysInstrument), 1)); + Out->EmitBinaryData(StringRef(reinterpret_cast<const char *>(&Version), 1)); + auto Padding = (4 * Bytes) - ((2 * Bytes) + 3); + assert(Padding >= 0 && "Instrumentation map entry > 4 * Word Size"); + Out->EmitZeros(Padding); +} + +void AsmPrinter::emitXRayTable() { + if (Sleds.empty()) + return; + + auto PrevSection = OutStreamer->getCurrentSectionOnly(); + const Function &F = MF->getFunction(); + MCSection *InstMap = nullptr; + MCSection *FnSledIndex = nullptr; + if (MF->getSubtarget().getTargetTriple().isOSBinFormatELF()) { + auto Associated = dyn_cast<MCSymbolELF>(CurrentFnSym); + assert(Associated != nullptr); + auto Flags = ELF::SHF_WRITE | ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER; + std::string GroupName; + if (F.hasComdat()) { + Flags |= ELF::SHF_GROUP; + GroupName = F.getComdat()->getName(); + } + + auto UniqueID = ++XRayFnUniqueID; + InstMap = + OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS, Flags, 0, + GroupName, UniqueID, Associated); + FnSledIndex = + OutContext.getELFSection("xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, + GroupName, UniqueID, Associated); + } else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) { + InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0, + SectionKind::getReadOnlyWithRel()); + FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx", 0, + SectionKind::getReadOnlyWithRel()); + } else { + llvm_unreachable("Unsupported target"); + } + + auto WordSizeBytes = MAI->getCodePointerSize(); + + // Now we switch to the instrumentation map section. Because this is done + // per-function, we are able to create an index entry that will represent the + // range of sleds associated with a function. + MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true); + OutStreamer->SwitchSection(InstMap); + OutStreamer->EmitLabel(SledsStart); + for (const auto &Sled : Sleds) + Sled.emit(WordSizeBytes, OutStreamer.get(), CurrentFnSym); + MCSymbol *SledsEnd = OutContext.createTempSymbol("xray_sleds_end", true); + OutStreamer->EmitLabel(SledsEnd); + + // We then emit a single entry in the index per function. We use the symbols + // that bound the instrumentation map as the range for a specific function. + // Each entry here will be 2 * word size aligned, as we're writing down two + // pointers. This should work for both 32-bit and 64-bit platforms. + OutStreamer->SwitchSection(FnSledIndex); + OutStreamer->EmitCodeAlignment(2 * WordSizeBytes); + OutStreamer->EmitSymbolValue(SledsStart, WordSizeBytes, false); + OutStreamer->EmitSymbolValue(SledsEnd, WordSizeBytes, false); + OutStreamer->SwitchSection(PrevSection); + Sleds.clear(); +} + +void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI, + SledKind Kind, uint8_t Version) { + const Function &F = MI.getMF()->getFunction(); + auto Attr = F.getFnAttribute("function-instrument"); + bool LogArgs = F.hasFnAttribute("xray-log-args"); + bool AlwaysInstrument = + Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always"; + if (Kind == SledKind::FUNCTION_ENTER && LogArgs) + Kind = SledKind::LOG_ARGS_ENTER; + Sleds.emplace_back(XRayFunctionEntry{Sled, CurrentFnSym, Kind, + AlwaysInstrument, &F, Version}); +} + +uint16_t AsmPrinter::getDwarfVersion() const { + return OutStreamer->getContext().getDwarfVersion(); +} + +void AsmPrinter::setDwarfVersion(uint16_t Version) { + OutStreamer->getContext().setDwarfVersion(Version); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp new file mode 100644 index 000000000000..992e44d95306 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -0,0 +1,293 @@ +//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dwarf emissions parts of AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#include "ByteStreamer.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +//===----------------------------------------------------------------------===// +// Dwarf Emission Helper Routines +//===----------------------------------------------------------------------===// + +/// EmitSLEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->EmitSLEB128IntValue(Value); +} + +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->EmitULEB128IntValue(Value, PadTo); +} + +/// Emit something like ".uleb128 Hi-Lo". +void AsmPrinter::EmitLabelDifferenceAsULEB128(const MCSymbol *Hi, + const MCSymbol *Lo) const { + OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo); +} + +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_uleb128: + return "uleb128"; + case dwarf::DW_EH_PE_sleb128: + return "sleb128"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4 + : + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8 + : + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 + : + return "indirect pcrel sdata8"; + } + + return "<unknown encoding>"; +} + +/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an +/// encoding. If verbose assembly output is enabled, we output comments +/// describing the encoding. Desc is an optional string saying what the +/// encoding is specifying (e.g. "LSDA"). +void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { + if (isVerbose()) { + if (Desc) + OutStreamer->AddComment(Twine(Desc) + " Encoding = " + + Twine(DecodeDWARFEncoding(Val))); + else + OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); + } + + OutStreamer->EmitIntValue(Val, 1); +} + +/// GetSizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + default: + llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: + return MF->getDataLayout().getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } +} + +void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, + unsigned Encoding) const { + if (GV) { + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + const MCExpr *Exp = + TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer); + OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + } else + OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); +} + +void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, + bool ForceOffset) const { + if (!ForceOffset) { + // On COFF targets, we have to emit the special .secrel32 directive. + if (MAI->needsDwarfSectionOffsetDirective()) { + OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0); + return; + } + + // If the format uses relocations with dwarf, refer to the symbol directly. + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + OutStreamer->EmitSymbolValue(Label, 4); + return; + } + } + + // Otherwise, emit it as a label difference from the start of the section. + EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); +} + +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntry S) const { + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + assert(S.Symbol && "No symbol available"); + emitDwarfSymbolReference(S.Symbol); + return; + } + + // Just emit the offset directly; no need for symbol math. + emitInt32(S.Offset); +} + +void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const { + EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize()); +} + +void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi, + const MCSymbol *Lo, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitLabelDifferenceAsULEB128(Hi, Lo); + else + EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding)); +} + +void AsmPrinter::EmitCallSiteValue(uint64_t Value, + unsigned Encoding) const { + // The least significant 3 bits specify the width of the encoding + if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128) + EmitULEB128(Value); + else + OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding)); +} + +//===----------------------------------------------------------------------===// +// Dwarf Lowering Routines +//===----------------------------------------------------------------------===// + +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpRegister: + OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer->EmitCFIWindowSave(); + break; + case MCCFIInstruction::OpNegateRAState: + OutStreamer->EmitCFINegateRAState(); + break; + case MCCFIInstruction::OpSameValue: + OutStreamer->EmitCFISameValue(Inst.getRegister()); + break; + case MCCFIInstruction::OpGnuArgsSize: + OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset()); + break; + case MCCFIInstruction::OpEscape: + OutStreamer->EmitCFIEscape(Inst.getValues()); + break; + case MCCFIInstruction::OpRestore: + OutStreamer->EmitCFIRestore(Inst.getRegister()); + break; + } +} + +void AsmPrinter::emitDwarfDIE(const DIE &Die) const { + // Emit the code (index) for the abbreviation. + if (isVerbose()) + OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" + + Twine::utohexstr(Die.getOffset()) + ":0x" + + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Die.getTag())); + EmitULEB128(Die.getAbbrevNumber()); + + // Emit the DIE attribute values. + for (const auto &V : Die.values()) { + dwarf::Attribute Attr = V.getAttribute(); + assert(V.getForm() && "Too many attributes for DIE (check abbreviation)"); + + if (isVerbose()) { + OutStreamer->AddComment(dwarf::AttributeString(Attr)); + if (Attr == dwarf::DW_AT_accessibility) + OutStreamer->AddComment( + dwarf::AccessibilityString(V.getDIEInteger().getValue())); + } + + // Emit an attribute using the defined form. + V.EmitValue(this); + } + + // Emit the DIE children if any. + if (Die.hasChildren()) { + for (auto &Child : Die.children()) + emitDwarfDIE(Child); + + OutStreamer->AddComment("End Of Children Mark"); + emitInt8(0); + } +} + +void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const { + // Emit the abbreviations code (base 1 index.) + EmitULEB128(Abbrev.getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev.Emit(this); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp new file mode 100644 index 000000000000..420df26a2b8b --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -0,0 +1,664 @@ +//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the inline assembler pieces of the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCTargetAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo +/// struct above. +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { + AsmPrinter::SrcMgrDiagInfo *DiagInfo = + static_cast<AsmPrinter::SrcMgrDiagInfo *>(diagInfo); + assert(DiagInfo && "Diagnostic context not passed down?"); + + // Look up a LocInfo for the buffer this diagnostic is coming from. + unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc()); + const MDNode *LocInfo = nullptr; + if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size()) + LocInfo = DiagInfo->LocInfos[BufNum-1]; + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (LocInfo) { + unsigned ErrorLine = Diag.getLineNo()-1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); +} + +unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, + const MDNode *LocMDNode) const { + if (!DiagInfo) { + DiagInfo = std::make_unique<SrcMgrDiagInfo>(); + + MCContext &Context = MMI->getContext(); + Context.setInlineSourceManager(&DiagInfo->SrcMgr); + + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + if (LLVMCtx.getInlineAsmDiagnosticHandler()) { + DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); + DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get()); + } + } + + SourceMgr &SrcMgr = DiagInfo->SrcMgr; + + std::unique_ptr<MemoryBuffer> Buffer; + // The inline asm source manager will outlive AsmStr, so make a copy of the + // string for SourceMgr to own. + Buffer = MemoryBuffer::getMemBufferCopy(AsmStr, "<inline asm>"); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + unsigned BufNum = SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + + // Store LocMDNode in DiagInfo, using BufNum as an identifier. + if (LocMDNode) { + DiagInfo->LocInfos.resize(BufNum); + DiagInfo->LocInfos[BufNum - 1] = LocMDNode; + } + + return BufNum; +} + + +/// EmitInlineAsm - Emit a blob of inline asm to the output streamer. +void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, + const MCTargetOptions &MCOptions, + const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { + assert(!Str.empty() && "Can't emit empty inline asm block"); + + // Remember if the buffer is nul terminated or not so we can avoid a copy. + bool isNullTerminated = Str.back() == 0; + if (isNullTerminated) + Str = Str.substr(0, Str.size()-1); + + // If the output streamer does not have mature MC support or the integrated + // assembler has been disabled, just emit the blob textually. + // Otherwise parse the asm and emit it via MC support. + // This is useful in case the asm parser doesn't handle something but the + // system assembler does. + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + if (!MCAI->useIntegratedAssembler() && + !OutStreamer->isIntegratedAssemblerRequired()) { + emitInlineAsmStart(); + OutStreamer->EmitRawText(Str); + emitInlineAsmEnd(STI, nullptr); + return; + } + + unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode); + DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); + + std::unique_ptr<MCAsmParser> Parser(createMCAsmParser( + DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); + + // Do not use assembler-level information for parsing inline assembly. + OutStreamer->setUseAssemblerInfoForParsing(false); + + // We create a new MCInstrInfo here since we might be at the module level + // and not have a MachineFunction to initialize the TargetInstrInfo from and + // we only need MCInstrInfo for asm parsing. We create one unconditionally + // because it's not subtarget dependent. + std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); + std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( + STI, *Parser, *MII, MCOptions)); + if (!TAP) + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); + Parser->setTargetParser(*TAP.get()); + // Enable lexing Masm binary and hex integer literals in intel inline + // assembly. + if (Dialect == InlineAsm::AD_Intel) + Parser->getLexer().setLexMasmIntegers(true); + + emitInlineAsmStart(); + // Don't implicitly switch to the text section before the asm. + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); + emitInlineAsmEnd(STI, &TAP->getSTI()); + + if (Res && !DiagInfo->DiagHandler) + report_fatal_error("Error parsing inline asm\n"); +} + +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; + + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (LastEmitted[0] == '{' && LastEmitted[1] == ':') { + LastEmitted += 2; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (!StrEnd) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + AP->PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } + } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} + +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int AsmPrinterVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + OS << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + OS << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (!StrEnd) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + AP->PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++LastEmitted; // Consume '}' character. + } + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + // FIXME: Shouldn't arch-independent output template handling go into + // PrintAsmOperand? + if (Modifier[0] == 'l') { // Labels are target independent. + if (MI->getOperand(OpNo).isBlockAddress()) { + const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress(); + MCSymbol *Sym = AP->GetBlockAddressSymbol(BA); + Sym->print(OS, AP->MAI); + MMI->getContext().registerInlineAsmLabel(Sym); + } else if (MI->getOperand(OpNo).isMBB()) { + const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); + Sym->print(OS, AP->MAI); + } else { + Error = true; + } + } else { + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand( + MI, OpNo, Modifier[0] ? Modifier : nullptr, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, + Modifier[0] ? Modifier : nullptr, OS); + } + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + } + break; + } + } + } + OS << '\n' << (char)0; // null terminate string. +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use emitRawComment. + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = nullptr; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS); + + // Emit warnings if we use reserved registers on the clobber list, as + // that might give surprising results. + std::vector<std::string> RestrRegs; + // Start with the first operand descriptor, and iterate over them. + for (unsigned I = InlineAsm::MIOp_FirstOperand, NumOps = MI->getNumOperands(); + I < NumOps; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (MO.isImm()) { + unsigned Flags = MO.getImm(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_Clobber && + !TRI->isAsmClobberable(*MF, MI->getOperand(I + 1).getReg())) { + RestrRegs.push_back(TRI->getName(MI->getOperand(I + 1).getReg())); + } + // Skip to one before the next operand descriptor, if it exists. + I += InlineAsm::getNumOperandRegisters(Flags); + } + } + + if (!RestrRegs.empty()) { + unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD); + auto &SrcMgr = DiagInfo->SrcMgr; + SMLoc Loc = SMLoc::getFromPointer( + SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); + + std::string Msg = "inline asm clobber list contains reserved registers: "; + for (auto I = RestrRegs.begin(), E = RestrRegs.end(); I != E; I++) { + if(I != RestrRegs.begin()) + Msg += ", "; + Msg += *I; + } + std::string Note = "Reserved registers on the clobber list may not be " + "preserved across the asm statement, and clobbering them may " + "lead to undefined behaviour."; + SrcMgr.PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note); + } + + EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD, + MI->getInlineAsmDialect()); + + // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't + // enabled, so we use emitRawComment. + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); +} + + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + if (!strcmp(Code, "private")) { + const DataLayout &DL = MF->getDataLayout(); + OS << DL.getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + OS << MAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + + // If this is a new LastFn instruction, bump the counter. + if (LastMI != MI || LastFn != getFunctionNumber()) { + ++Counter; + LastMI = MI; + LastFn = getFunctionNumber(); + } + OS << Counter; + } else { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + report_fatal_error(Msg.str()); + } +} + +void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) { + assert(MO.isGlobal() && "caller should check MO.isGlobal"); + getSymbol(MO.getGlobal())->print(OS, MAI); + printOffset(MO.getOffset(), OS); +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// override this to format as appropriate for machine specific ExtraCodes +/// or when the arch-independent handling would be too complex otherwise. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'a': // Print as memory address. + if (MO.isReg()) { + PrintAsmMemoryOperand(MI, OpNo, nullptr, O); + return false; + } + LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates. + case 'c': // Substitute immediate value without immediate syntax + if (MO.isImm()) { + O << MO.getImm(); + return false; + } + if (MO.isGlobal()) { + PrintSymbolOperand(MO, O); + return false; + } + return true; + case 'n': // Negate the immediate constant. + if (!MO.isImm()) + return true; + O << -MO.getImm(); + return false; + case 's': // The GCC deprecated s modifier + if (!MO.isImm()) + return true; + O << ((32 - MO.getImm()) & 31); + return false; + } + } + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + const char *ExtraCode, raw_ostream &O) { + // Target doesn't support this yet! + return true; +} + +void AsmPrinter::emitInlineAsmStart() const {} + +void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const {} diff --git a/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h new file mode 100644 index 000000000000..09f7496cd4ef --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -0,0 +1,121 @@ +//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a class that can take bytes that would normally be +// streamed via the AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H + +#include "DIEHash.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include <string> + +namespace llvm { +class ByteStreamer { + protected: + ~ByteStreamer() = default; + ByteStreamer(const ByteStreamer&) = default; + ByteStreamer() = default; + + public: + // For now we're just handling the calls we need for dwarf emission/hashing. + virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; + virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0; +}; + +class APByteStreamer final : public ByteStreamer { +private: + AsmPrinter &AP; + +public: + APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + AP.OutStreamer->AddComment(Comment); + AP.emitInt8(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer->AddComment(Comment); + AP.EmitSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + AP.OutStreamer->AddComment(Comment); + AP.EmitULEB128(DWord); + } +}; + +class HashingByteStreamer final : public ByteStreamer { + private: + DIEHash &Hash; + public: + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + Hash.addULEB128(DWord); + } +}; + +class BufferByteStreamer final : public ByteStreamer { +private: + SmallVectorImpl<char> &Buffer; + std::vector<std::string> &Comments; + +public: + /// Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. If false, comments passed in to + /// the emit methods will be ignored. + const bool GenerateComments; + + BufferByteStreamer(SmallVectorImpl<char> &Buffer, + std::vector<std::string> &Comments, bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) { + } + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Buffer.push_back(Byte); + if (GenerateComments) + Comments.push_back(Comment.str()); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + unsigned Length = encodeSLEB128(DWord, OSE); + if (GenerateComments) { + Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } + } + void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override { + raw_svector_ostream OSE(Buffer); + unsigned Length = encodeULEB128(DWord, OSE, PadTo); + if (GenerateComments) { + Comments.push_back(Comment.str()); + // Add some empty comments to keep the Buffer and Comments vectors aligned + // with each other. + for (size_t i = 1; i < Length; ++i) + Comments.push_back(""); + + } + } +}; + +} + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp new file mode 100644 index 000000000000..c6457f3626d1 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -0,0 +1,3116 @@ +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Microsoft CodeView debug info. +// +//===----------------------------------------------------------------------===// + +#include "CodeViewDebug.h" +#include "DwarfExpression.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h" +#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h" +#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h" +#include "llvm/DebugInfo/CodeView/EnumTables.h" +#include "llvm/DebugInfo/CodeView/Line.h" +#include "llvm/DebugInfo/CodeView/SymbolRecord.h" +#include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/DebugInfo/CodeView/TypeTableCollection.h" +#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/BinaryByteStream.h" +#include "llvm/Support/BinaryStreamReader.h" +#include "llvm/Support/BinaryStreamWriter.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cctype> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; +using namespace llvm::codeview; + +namespace { +class CVMCAdapter : public CodeViewRecordStreamer { +public: + CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable) + : OS(&OS), TypeTable(TypeTable) {} + + void EmitBytes(StringRef Data) { OS->EmitBytes(Data); } + + void EmitIntValue(uint64_t Value, unsigned Size) { + OS->EmitIntValueInHex(Value, Size); + } + + void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); } + + void AddComment(const Twine &T) { OS->AddComment(T); } + + void AddRawComment(const Twine &T) { OS->emitRawComment(T); } + + bool isVerboseAsm() { return OS->isVerboseAsm(); } + + std::string getTypeName(TypeIndex TI) { + std::string TypeName; + if (!TI.isNoneType()) { + if (TI.isSimple()) + TypeName = TypeIndex::simpleTypeName(TI); + else + TypeName = TypeTable.getTypeName(TI); + } + return TypeName; + } + +private: + MCStreamer *OS = nullptr; + TypeCollection &TypeTable; +}; +} // namespace + +static CPUType mapArchToCVCPUType(Triple::ArchType Type) { + switch (Type) { + case Triple::ArchType::x86: + return CPUType::Pentium3; + case Triple::ArchType::x86_64: + return CPUType::X64; + case Triple::ArchType::thumb: + return CPUType::Thumb; + case Triple::ArchType::aarch64: + return CPUType::ARM64; + default: + report_fatal_error("target architecture doesn't map to a CodeView CPUType"); + } +} + +CodeViewDebug::CodeViewDebug(AsmPrinter *AP) + : DebugHandlerBase(AP), OS(*Asm->OutStreamer), TypeTable(Allocator) { + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || + !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) { + Asm = nullptr; + MMI->setDebugInfoAvailability(false); + return; + } + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + + TheCPU = + mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch()); + + collectGlobalVariableInfo(); + + // Check if we should emit type record hashes. + ConstantInt *GH = mdconst::extract_or_null<ConstantInt>( + MMI->getModule()->getModuleFlag("CodeViewGHash")); + EmitDebugGlobalHashes = GH && !GH->isZero(); +} + +StringRef CodeViewDebug::getFullFilepath(const DIFile *File) { + std::string &Filepath = FileToFilepathMap[File]; + if (!Filepath.empty()) + return Filepath; + + StringRef Dir = File->getDirectory(), Filename = File->getFilename(); + + // If this is a Unix-style path, just use it as is. Don't try to canonicalize + // it textually because one of the path components could be a symlink. + if (Dir.startswith("/") || Filename.startswith("/")) { + if (llvm::sys::path::is_absolute(Filename, llvm::sys::path::Style::posix)) + return Filename; + Filepath = Dir; + if (Dir.back() != '/') + Filepath += '/'; + Filepath += Filename; + return Filepath; + } + + // Clang emits directory and relative filename info into the IR, but CodeView + // operates on full paths. We could change Clang to emit full paths too, but + // that would increase the IR size and probably not needed for other users. + // For now, just concatenate and canonicalize the path here. + if (Filename.find(':') == 1) + Filepath = Filename; + else + Filepath = (Dir + "\\" + Filename).str(); + + // Canonicalize the path. We have to do it textually because we may no longer + // have access the file in the filesystem. + // First, replace all slashes with backslashes. + std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); + + // Remove all "\.\" with "\". + size_t Cursor = 0; + while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 2); + + // Replace all "\XXX\..\" with "\". Don't try too hard though as the original + // path should be well-formatted, e.g. start with a drive letter, etc. + Cursor = 0; + while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { + // Something's wrong if the path starts with "\..\", abort. + if (Cursor == 0) + break; + + size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); + if (PrevSlash == std::string::npos) + // Something's wrong, abort. + break; + + Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); + // The next ".." might be following the one we've just erased. + Cursor = PrevSlash; + } + + // Remove all duplicate backslashes. + Cursor = 0; + while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 1); + + return Filepath; +} + +unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) { + StringRef FullPath = getFullFilepath(F); + unsigned NextId = FileIdMap.size() + 1; + auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId)); + if (Insertion.second) { + // We have to compute the full filepath and emit a .cv_file directive. + ArrayRef<uint8_t> ChecksumAsBytes; + FileChecksumKind CSKind = FileChecksumKind::None; + if (F->getChecksum()) { + std::string Checksum = fromHex(F->getChecksum()->Value); + void *CKMem = OS.getContext().allocate(Checksum.size(), 1); + memcpy(CKMem, Checksum.data(), Checksum.size()); + ChecksumAsBytes = ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(CKMem), Checksum.size()); + switch (F->getChecksum()->Kind) { + case DIFile::CSK_MD5: CSKind = FileChecksumKind::MD5; break; + case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break; + } + } + bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes, + static_cast<unsigned>(CSKind)); + (void)Success; + assert(Success && ".cv_file directive failed"); + } + return Insertion.first->second; +} + +CodeViewDebug::InlineSite & +CodeViewDebug::getInlineSite(const DILocation *InlinedAt, + const DISubprogram *Inlinee) { + auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()}); + InlineSite *Site = &SiteInsertion.first->second; + if (SiteInsertion.second) { + unsigned ParentFuncId = CurFn->FuncId; + if (const DILocation *OuterIA = InlinedAt->getInlinedAt()) + ParentFuncId = + getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram()) + .SiteFuncId; + + Site->SiteFuncId = NextFuncId++; + OS.EmitCVInlineSiteIdDirective( + Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()), + InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc()); + Site->Inlinee = Inlinee; + InlinedSubprograms.insert(Inlinee); + getFuncIdForSubprogram(Inlinee); + } + return *Site; +} + +static StringRef getPrettyScopeName(const DIScope *Scope) { + StringRef ScopeName = Scope->getName(); + if (!ScopeName.empty()) + return ScopeName; + + switch (Scope->getTag()) { + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + return "<unnamed-tag>"; + case dwarf::DW_TAG_namespace: + return "`anonymous namespace'"; + } + + return StringRef(); +} + +static const DISubprogram *getQualifiedNameComponents( + const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) { + const DISubprogram *ClosestSubprogram = nullptr; + while (Scope != nullptr) { + if (ClosestSubprogram == nullptr) + ClosestSubprogram = dyn_cast<DISubprogram>(Scope); + StringRef ScopeName = getPrettyScopeName(Scope); + if (!ScopeName.empty()) + QualifiedNameComponents.push_back(ScopeName); + Scope = Scope->getScope(); + } + return ClosestSubprogram; +} + +static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents, + StringRef TypeName) { + std::string FullyQualifiedName; + for (StringRef QualifiedNameComponent : + llvm::reverse(QualifiedNameComponents)) { + FullyQualifiedName.append(QualifiedNameComponent); + FullyQualifiedName.append("::"); + } + FullyQualifiedName.append(TypeName); + return FullyQualifiedName; +} + +static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) { + SmallVector<StringRef, 5> QualifiedNameComponents; + getQualifiedNameComponents(Scope, QualifiedNameComponents); + return getQualifiedName(QualifiedNameComponents, Name); +} + +struct CodeViewDebug::TypeLoweringScope { + TypeLoweringScope(CodeViewDebug &CVD) : CVD(CVD) { ++CVD.TypeEmissionLevel; } + ~TypeLoweringScope() { + // Don't decrement TypeEmissionLevel until after emitting deferred types, so + // inner TypeLoweringScopes don't attempt to emit deferred types. + if (CVD.TypeEmissionLevel == 1) + CVD.emitDeferredCompleteTypes(); + --CVD.TypeEmissionLevel; + } + CodeViewDebug &CVD; +}; + +static std::string getFullyQualifiedName(const DIScope *Ty) { + const DIScope *Scope = Ty->getScope(); + return getFullyQualifiedName(Scope, getPrettyScopeName(Ty)); +} + +TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) { + // No scope means global scope and that uses the zero index. + if (!Scope || isa<DIFile>(Scope)) + return TypeIndex(); + + assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type"); + + // Check if we've already translated this scope. + auto I = TypeIndices.find({Scope, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + // Build the fully qualified name of the scope. + std::string ScopeName = getFullyQualifiedName(Scope); + StringIdRecord SID(TypeIndex(), ScopeName); + auto TI = TypeTable.writeLeafType(SID); + return recordTypeIndexForDINode(Scope, TI); +} + +TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) { + assert(SP); + + // Check if we've already translated this subprogram. + auto I = TypeIndices.find({SP, nullptr}); + if (I != TypeIndices.end()) + return I->second; + + // The display name includes function template arguments. Drop them to match + // MSVC. + StringRef DisplayName = SP->getName().split('<').first; + + const DIScope *Scope = SP->getScope(); + TypeIndex TI; + if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) { + // If the scope is a DICompositeType, then this must be a method. Member + // function types take some special handling, and require access to the + // subprogram. + TypeIndex ClassType = getTypeIndex(Class); + MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class), + DisplayName); + TI = TypeTable.writeLeafType(MFuncId); + } else { + // Otherwise, this must be a free function. + TypeIndex ParentScope = getScopeIndex(Scope); + FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName); + TI = TypeTable.writeLeafType(FuncId); + } + + return recordTypeIndexForDINode(SP, TI); +} + +static bool isNonTrivial(const DICompositeType *DCTy) { + return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial); +} + +static FunctionOptions +getFunctionOptions(const DISubroutineType *Ty, + const DICompositeType *ClassTy = nullptr, + StringRef SPName = StringRef("")) { + FunctionOptions FO = FunctionOptions::None; + const DIType *ReturnTy = nullptr; + if (auto TypeArray = Ty->getTypeArray()) { + if (TypeArray.size()) + ReturnTy = TypeArray[0]; + } + + if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) { + if (isNonTrivial(ReturnDCTy)) + FO |= FunctionOptions::CxxReturnUdt; + } + + // DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison. + if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) { + FO |= FunctionOptions::Constructor; + + // TODO: put the FunctionOptions::ConstructorWithVirtualBases flag. + + } + return FO; +} + +TypeIndex CodeViewDebug::getMemberFunctionType(const DISubprogram *SP, + const DICompositeType *Class) { + // Always use the method declaration as the key for the function type. The + // method declaration contains the this adjustment. + if (SP->getDeclaration()) + SP = SP->getDeclaration(); + assert(!SP->getDeclaration() && "should use declaration as key"); + + // Key the MemberFunctionRecord into the map as {SP, Class}. It won't collide + // with the MemberFuncIdRecord, which is keyed in as {SP, nullptr}. + auto I = TypeIndices.find({SP, Class}); + if (I != TypeIndices.end()) + return I->second; + + // Make sure complete type info for the class is emitted *after* the member + // function type, as the complete class type is likely to reference this + // member function type. + TypeLoweringScope S(*this); + const bool IsStaticMethod = (SP->getFlags() & DINode::FlagStaticMember) != 0; + + FunctionOptions FO = getFunctionOptions(SP->getType(), Class, SP->getName()); + TypeIndex TI = lowerTypeMemberFunction( + SP->getType(), Class, SP->getThisAdjustment(), IsStaticMethod, FO); + return recordTypeIndexForDINode(SP, TI, Class); +} + +TypeIndex CodeViewDebug::recordTypeIndexForDINode(const DINode *Node, + TypeIndex TI, + const DIType *ClassTy) { + auto InsertResult = TypeIndices.insert({{Node, ClassTy}, TI}); + (void)InsertResult; + assert(InsertResult.second && "DINode was already assigned a type index"); + return TI; +} + +unsigned CodeViewDebug::getPointerSizeInBytes() { + return MMI->getModule()->getDataLayout().getPointerSizeInBits() / 8; +} + +void CodeViewDebug::recordLocalVariable(LocalVariable &&Var, + const LexicalScope *LS) { + if (const DILocation *InlinedAt = LS->getInlinedAt()) { + // This variable was inlined. Associate it with the InlineSite. + const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram(); + InlineSite &Site = getInlineSite(InlinedAt, Inlinee); + Site.InlinedLocals.emplace_back(Var); + } else { + // This variable goes into the corresponding lexical scope. + ScopeVariables[LS].emplace_back(Var); + } +} + +static void addLocIfNotPresent(SmallVectorImpl<const DILocation *> &Locs, + const DILocation *Loc) { + auto B = Locs.begin(), E = Locs.end(); + if (std::find(B, E, Loc) == E) + Locs.push_back(Loc); +} + +void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL, + const MachineFunction *MF) { + // Skip this instruction if it has the same location as the previous one. + if (!DL || DL == PrevInstLoc) + return; + + const DIScope *Scope = DL.get()->getScope(); + if (!Scope) + return; + + // Skip this line if it is longer than the maximum we can record. + LineInfo LI(DL.getLine(), DL.getLine(), /*IsStatement=*/true); + if (LI.getStartLine() != DL.getLine() || LI.isAlwaysStepInto() || + LI.isNeverStepInto()) + return; + + ColumnInfo CI(DL.getCol(), /*EndColumn=*/0); + if (CI.getStartColumn() != DL.getCol()) + return; + + if (!CurFn->HaveLineInfo) + CurFn->HaveLineInfo = true; + unsigned FileId = 0; + if (PrevInstLoc.get() && PrevInstLoc->getFile() == DL->getFile()) + FileId = CurFn->LastFileId; + else + FileId = CurFn->LastFileId = maybeRecordFile(DL->getFile()); + PrevInstLoc = DL; + + unsigned FuncId = CurFn->FuncId; + if (const DILocation *SiteLoc = DL->getInlinedAt()) { + const DILocation *Loc = DL.get(); + + // If this location was actually inlined from somewhere else, give it the ID + // of the inline call site. + FuncId = + getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()).SiteFuncId; + + // Ensure we have links in the tree of inline call sites. + bool FirstLoc = true; + while ((SiteLoc = Loc->getInlinedAt())) { + InlineSite &Site = + getInlineSite(SiteLoc, Loc->getScope()->getSubprogram()); + if (!FirstLoc) + addLocIfNotPresent(Site.ChildSites, Loc); + FirstLoc = false; + Loc = SiteLoc; + } + addLocIfNotPresent(CurFn->ChildSites, Loc); + } + + OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(), + /*PrologueEnd=*/false, /*IsStmt=*/false, + DL->getFilename(), SMLoc()); +} + +void CodeViewDebug::emitCodeViewMagicVersion() { + OS.EmitValueToAlignment(4); + OS.AddComment("Debug section magic"); + OS.EmitIntValue(COFF::DEBUG_SECTION_MAGIC, 4); +} + +void CodeViewDebug::endModule() { + if (!Asm || !MMI->hasDebugInfo()) + return; + + assert(Asm != nullptr); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + // Use the generic .debug$S section, and make a subsection for all the inlined + // subprograms. + switchToDebugSectionForSymbol(nullptr); + + MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); + emitCompilerInformation(); + endCVSubsection(CompilerInfo); + + emitInlineeLinesSubsection(); + + // Emit per-function debug information. + for (auto &P : FnDebugInfo) + if (!P.first->isDeclarationForLinker()) + emitDebugInfoForFunction(P.first, *P.second); + + // Emit global variable debug information. + setCurrentSubprogram(nullptr); + emitDebugInfoForGlobals(); + + // Emit retained types. + emitDebugInfoForRetainedTypes(); + + // Switch back to the generic .debug$S section after potentially processing + // comdat symbol sections. + switchToDebugSectionForSymbol(nullptr); + + // Emit UDT records for any types used by global variables. + if (!GlobalUDTs.empty()) { + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + emitDebugInfoForUDTs(GlobalUDTs); + endCVSubsection(SymbolsEnd); + } + + // This subsection holds a file index to offset in string table table. + OS.AddComment("File index to string table offset subsection"); + OS.EmitCVFileChecksumsDirective(); + + // This subsection holds the string table. + OS.AddComment("String table"); + OS.EmitCVStringTableDirective(); + + // Emit S_BUILDINFO, which points to LF_BUILDINFO. Put this in its own symbol + // subsection in the generic .debug$S section at the end. There is no + // particular reason for this ordering other than to match MSVC. + emitBuildInfo(); + + // Emit type information and hashes last, so that any types we translate while + // emitting function info are included. + emitTypeInformation(); + + if (EmitDebugGlobalHashes) + emitTypeGlobalHashes(); + + clear(); +} + +static void +emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S, + unsigned MaxFixedRecordLength = 0xF00) { + // The maximum CV record length is 0xFF00. Most of the strings we emit appear + // after a fixed length portion of the record. The fixed length portion should + // always be less than 0xF00 (3840) bytes, so truncate the string so that the + // overall record size is less than the maximum allowed. + SmallString<32> NullTerminatedString( + S.take_front(MaxRecordLength - MaxFixedRecordLength - 1)); + NullTerminatedString.push_back('\0'); + OS.EmitBytes(NullTerminatedString); +} + +void CodeViewDebug::emitTypeInformation() { + if (TypeTable.empty()) + return; + + // Start the .debug$T or .debug$P section with 0x4. + OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection()); + emitCodeViewMagicVersion(); + + TypeTableCollection Table(TypeTable.records()); + TypeVisitorCallbackPipeline Pipeline; + + // To emit type record using Codeview MCStreamer adapter + CVMCAdapter CVMCOS(OS, Table); + TypeRecordMapping typeMapping(CVMCOS); + Pipeline.addCallbackToPipeline(typeMapping); + + Optional<TypeIndex> B = Table.getFirst(); + while (B) { + // This will fail if the record data is invalid. + CVType Record = Table.getType(*B); + + Error E = codeview::visitTypeRecord(Record, *B, Pipeline); + + if (E) { + logAllUnhandledErrors(std::move(E), errs(), "error: "); + llvm_unreachable("produced malformed type record"); + } + + B = Table.getNext(*B); + } +} + +void CodeViewDebug::emitTypeGlobalHashes() { + if (TypeTable.empty()) + return; + + // Start the .debug$H section with the version and hash algorithm, currently + // hardcoded to version 0, SHA1. + OS.SwitchSection(Asm->getObjFileLowering().getCOFFGlobalTypeHashesSection()); + + OS.EmitValueToAlignment(4); + OS.AddComment("Magic"); + OS.EmitIntValue(COFF::DEBUG_HASHES_SECTION_MAGIC, 4); + OS.AddComment("Section Version"); + OS.EmitIntValue(0, 2); + OS.AddComment("Hash Algorithm"); + OS.EmitIntValue(uint16_t(GlobalTypeHashAlg::SHA1_8), 2); + + TypeIndex TI(TypeIndex::FirstNonSimpleIndex); + for (const auto &GHR : TypeTable.hashes()) { + if (OS.isVerboseAsm()) { + // Emit an EOL-comment describing which TypeIndex this hash corresponds + // to, as well as the stringified SHA1 hash. + SmallString<32> Comment; + raw_svector_ostream CommentOS(Comment); + CommentOS << formatv("{0:X+} [{1}]", TI.getIndex(), GHR); + OS.AddComment(Comment); + ++TI; + } + assert(GHR.Hash.size() == 8); + StringRef S(reinterpret_cast<const char *>(GHR.Hash.data()), + GHR.Hash.size()); + OS.EmitBinaryData(S); + } +} + +static SourceLanguage MapDWLangToCVLang(unsigned DWLang) { + switch (DWLang) { + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_ObjC: + return SourceLanguage::C; + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_C_plus_plus_14: + return SourceLanguage::Cpp; + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + return SourceLanguage::Fortran; + case dwarf::DW_LANG_Pascal83: + return SourceLanguage::Pascal; + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + return SourceLanguage::Cobol; + case dwarf::DW_LANG_Java: + return SourceLanguage::Java; + case dwarf::DW_LANG_D: + return SourceLanguage::D; + case dwarf::DW_LANG_Swift: + return SourceLanguage::Swift; + default: + // There's no CodeView representation for this language, and CV doesn't + // have an "unknown" option for the language field, so we'll use MASM, + // as it's very low level. + return SourceLanguage::Masm; + } +} + +namespace { +struct Version { + int Part[4]; +}; +} // end anonymous namespace + +// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out +// the version number. +static Version parseVersion(StringRef Name) { + Version V = {{0}}; + int N = 0; + for (const char C : Name) { + if (isdigit(C)) { + V.Part[N] *= 10; + V.Part[N] += C - '0'; + } else if (C == '.') { + ++N; + if (N >= 4) + return V; + } else if (N > 0) + return V; + } + return V; +} + +void CodeViewDebug::emitCompilerInformation() { + MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3); + uint32_t Flags = 0; + + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); + const auto *CU = cast<DICompileUnit>(Node); + + // The low byte of the flags indicates the source language. + Flags = MapDWLangToCVLang(CU->getSourceLanguage()); + // TODO: Figure out which other flags need to be set. + + OS.AddComment("Flags and language"); + OS.EmitIntValue(Flags, 4); + + OS.AddComment("CPUType"); + OS.EmitIntValue(static_cast<uint64_t>(TheCPU), 2); + + StringRef CompilerVersion = CU->getProducer(); + Version FrontVer = parseVersion(CompilerVersion); + OS.AddComment("Frontend version"); + for (int N = 0; N < 4; ++N) + OS.EmitIntValue(FrontVer.Part[N], 2); + + // Some Microsoft tools, like Binscope, expect a backend version number of at + // least 8.something, so we'll coerce the LLVM version into a form that + // guarantees it'll be big enough without really lying about the version. + int Major = 1000 * LLVM_VERSION_MAJOR + + 10 * LLVM_VERSION_MINOR + + LLVM_VERSION_PATCH; + // Clamp it for builds that use unusually large version numbers. + Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max()); + Version BackVer = {{ Major, 0, 0, 0 }}; + OS.AddComment("Backend version"); + for (int N = 0; N < 4; ++N) + OS.EmitIntValue(BackVer.Part[N], 2); + + OS.AddComment("Null-terminated compiler version string"); + emitNullTerminatedSymbolName(OS, CompilerVersion); + + endSymbolRecord(CompilerEnd); +} + +static TypeIndex getStringIdTypeIdx(GlobalTypeTableBuilder &TypeTable, + StringRef S) { + StringIdRecord SIR(TypeIndex(0x0), S); + return TypeTable.writeLeafType(SIR); +} + +void CodeViewDebug::emitBuildInfo() { + // First, make LF_BUILDINFO. It's a sequence of strings with various bits of + // build info. The known prefix is: + // - Absolute path of current directory + // - Compiler path + // - Main source file path, relative to CWD or absolute + // - Type server PDB file + // - Canonical compiler command line + // If frontend and backend compilation are separated (think llc or LTO), it's + // not clear if the compiler path should refer to the executable for the + // frontend or the backend. Leave it blank for now. + TypeIndex BuildInfoArgs[BuildInfoRecord::MaxArgs] = {}; + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + const MDNode *Node = *CUs->operands().begin(); // FIXME: Multiple CUs. + const auto *CU = cast<DICompileUnit>(Node); + const DIFile *MainSourceFile = CU->getFile(); + BuildInfoArgs[BuildInfoRecord::CurrentDirectory] = + getStringIdTypeIdx(TypeTable, MainSourceFile->getDirectory()); + BuildInfoArgs[BuildInfoRecord::SourceFile] = + getStringIdTypeIdx(TypeTable, MainSourceFile->getFilename()); + // FIXME: Path to compiler and command line. PDB is intentionally blank unless + // we implement /Zi type servers. + BuildInfoRecord BIR(BuildInfoArgs); + TypeIndex BuildInfoIndex = TypeTable.writeLeafType(BIR); + + // Make a new .debug$S subsection for the S_BUILDINFO record, which points + // from the module symbols into the type stream. + MCSymbol *BISubsecEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + MCSymbol *BIEnd = beginSymbolRecord(SymbolKind::S_BUILDINFO); + OS.AddComment("LF_BUILDINFO index"); + OS.EmitIntValue(BuildInfoIndex.getIndex(), 4); + endSymbolRecord(BIEnd); + endCVSubsection(BISubsecEnd); +} + +void CodeViewDebug::emitInlineeLinesSubsection() { + if (InlinedSubprograms.empty()) + return; + + OS.AddComment("Inlinee lines subsection"); + MCSymbol *InlineEnd = beginCVSubsection(DebugSubsectionKind::InlineeLines); + + // We emit the checksum info for files. This is used by debuggers to + // determine if a pdb matches the source before loading it. Visual Studio, + // for instance, will display a warning that the breakpoints are not valid if + // the pdb does not match the source. + OS.AddComment("Inlinee lines signature"); + OS.EmitIntValue(unsigned(InlineeLinesSignature::Normal), 4); + + for (const DISubprogram *SP : InlinedSubprograms) { + assert(TypeIndices.count({SP, nullptr})); + TypeIndex InlineeIdx = TypeIndices[{SP, nullptr}]; + + OS.AddBlankLine(); + unsigned FileId = maybeRecordFile(SP->getFile()); + OS.AddComment("Inlined function " + SP->getName() + " starts at " + + SP->getFilename() + Twine(':') + Twine(SP->getLine())); + OS.AddBlankLine(); + OS.AddComment("Type index of inlined function"); + OS.EmitIntValue(InlineeIdx.getIndex(), 4); + OS.AddComment("Offset into filechecksum table"); + OS.EmitCVFileChecksumOffsetDirective(FileId); + OS.AddComment("Starting line number"); + OS.EmitIntValue(SP->getLine(), 4); + } + + endCVSubsection(InlineEnd); +} + +void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI, + const DILocation *InlinedAt, + const InlineSite &Site) { + assert(TypeIndices.count({Site.Inlinee, nullptr})); + TypeIndex InlineeIdx = TypeIndices[{Site.Inlinee, nullptr}]; + + // SymbolRecord + MCSymbol *InlineEnd = beginSymbolRecord(SymbolKind::S_INLINESITE); + + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("Inlinee type index"); + OS.EmitIntValue(InlineeIdx.getIndex(), 4); + + unsigned FileId = maybeRecordFile(Site.Inlinee->getFile()); + unsigned StartLineNum = Site.Inlinee->getLine(); + + OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum, + FI.Begin, FI.End); + + endSymbolRecord(InlineEnd); + + emitLocalVariableList(FI, Site.InlinedLocals); + + // Recurse on child inlined call sites before closing the scope. + for (const DILocation *ChildSite : Site.ChildSites) { + auto I = FI.InlineSites.find(ChildSite); + assert(I != FI.InlineSites.end() && + "child site not in function inline site map"); + emitInlinedCallSite(FI, ChildSite, I->second); + } + + // Close the scope. + emitEndSymbolRecord(SymbolKind::S_INLINESITE_END); +} + +void CodeViewDebug::switchToDebugSectionForSymbol(const MCSymbol *GVSym) { + // If we have a symbol, it may be in a section that is COMDAT. If so, find the + // comdat key. A section may be comdat because of -ffunction-sections or + // because it is comdat in the IR. + MCSectionCOFF *GVSec = + GVSym ? dyn_cast<MCSectionCOFF>(&GVSym->getSection()) : nullptr; + const MCSymbol *KeySym = GVSec ? GVSec->getCOMDATSymbol() : nullptr; + + MCSectionCOFF *DebugSec = cast<MCSectionCOFF>( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + DebugSec = OS.getContext().getAssociativeCOFFSection(DebugSec, KeySym); + + OS.SwitchSection(DebugSec); + + // Emit the magic version number if this is the first time we've switched to + // this section. + if (ComdatDebugSections.insert(DebugSec).second) + emitCodeViewMagicVersion(); +} + +// Emit an S_THUNK32/S_END symbol pair for a thunk routine. +// The only supported thunk ordinal is currently the standard type. +void CodeViewDebug::emitDebugInfoForThunk(const Function *GV, + FunctionInfo &FI, + const MCSymbol *Fn) { + std::string FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + const ThunkOrdinal ordinal = ThunkOrdinal::Standard; // Only supported kind. + + OS.AddComment("Symbol subsection for " + Twine(FuncName)); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + + // Emit S_THUNK32 + MCSymbol *ThunkRecordEnd = beginSymbolRecord(SymbolKind::S_THUNK32); + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrNext"); + OS.EmitIntValue(0, 4); + OS.AddComment("Thunk section relative address"); + OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.AddComment("Thunk section index"); + OS.EmitCOFFSectionIndex(Fn); + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(FI.End, Fn, 2); + OS.AddComment("Ordinal"); + OS.EmitIntValue(unsigned(ordinal), 1); + OS.AddComment("Function name"); + emitNullTerminatedSymbolName(OS, FuncName); + // Additional fields specific to the thunk ordinal would go here. + endSymbolRecord(ThunkRecordEnd); + + // Local variables/inlined routines are purposely omitted here. The point of + // marking this as a thunk is so Visual Studio will NOT stop in this routine. + + // Emit S_PROC_ID_END + emitEndSymbolRecord(SymbolKind::S_PROC_ID_END); + + endCVSubsection(SymbolsEnd); +} + +void CodeViewDebug::emitDebugInfoForFunction(const Function *GV, + FunctionInfo &FI) { + // For each function there is a separate subsection which holds the PC to + // file:line table. + const MCSymbol *Fn = Asm->getSymbol(GV); + assert(Fn); + + // Switch to the to a comdat section, if appropriate. + switchToDebugSectionForSymbol(Fn); + + std::string FuncName; + auto *SP = GV->getSubprogram(); + assert(SP); + setCurrentSubprogram(SP); + + if (SP->isThunk()) { + emitDebugInfoForThunk(GV, FI, Fn); + return; + } + + // If we have a display name, build the fully qualified name by walking the + // chain of scopes. + if (!SP->getName().empty()) + FuncName = getFullyQualifiedName(SP->getScope(), SP->getName()); + + // If our DISubprogram name is empty, use the mangled name. + if (FuncName.empty()) + FuncName = GlobalValue::dropLLVMManglingEscape(GV->getName()); + + // Emit FPO data, but only on 32-bit x86. No other platforms use it. + if (Triple(MMI->getModule()->getTargetTriple()).getArch() == Triple::x86) + OS.EmitCVFPOData(Fn); + + // Emit a symbol subsection, required by VS2012+ to find function boundaries. + OS.AddComment("Symbol subsection for " + Twine(FuncName)); + MCSymbol *SymbolsEnd = beginCVSubsection(DebugSubsectionKind::Symbols); + { + SymbolKind ProcKind = GV->hasLocalLinkage() ? SymbolKind::S_LPROC32_ID + : SymbolKind::S_GPROC32_ID; + MCSymbol *ProcRecordEnd = beginSymbolRecord(ProcKind); + + // These fields are filled in by tools like CVPACK which run after the fact. + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); + OS.AddComment("PtrNext"); + OS.EmitIntValue(0, 4); + // This is the important bit that tells the debugger where the function + // code is located and what's its size: + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(FI.End, Fn, 4); + OS.AddComment("Offset after prologue"); + OS.EmitIntValue(0, 4); + OS.AddComment("Offset before epilogue"); + OS.EmitIntValue(0, 4); + OS.AddComment("Function type index"); + OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4); + OS.AddComment("Function section relative address"); + OS.EmitCOFFSecRel32(Fn, /*Offset=*/0); + OS.AddComment("Function section index"); + OS.EmitCOFFSectionIndex(Fn); + OS.AddComment("Flags"); + OS.EmitIntValue(0, 1); + // Emit the function display name as a null-terminated string. + OS.AddComment("Function name"); + // Truncate the name so we won't overflow the record length field. + emitNullTerminatedSymbolName(OS, FuncName); + endSymbolRecord(ProcRecordEnd); + + MCSymbol *FrameProcEnd = beginSymbolRecord(SymbolKind::S_FRAMEPROC); + // Subtract out the CSR size since MSVC excludes that and we include it. + OS.AddComment("FrameSize"); + OS.EmitIntValue(FI.FrameSize - FI.CSRSize, 4); + OS.AddComment("Padding"); + OS.EmitIntValue(0, 4); + OS.AddComment("Offset of padding"); + OS.EmitIntValue(0, 4); + OS.AddComment("Bytes of callee saved registers"); + OS.EmitIntValue(FI.CSRSize, 4); + OS.AddComment("Exception handler offset"); + OS.EmitIntValue(0, 4); + OS.AddComment("Exception handler section"); + OS.EmitIntValue(0, 2); + OS.AddComment("Flags (defines frame register)"); + OS.EmitIntValue(uint32_t(FI.FrameProcOpts), 4); + endSymbolRecord(FrameProcEnd); + + emitLocalVariableList(FI, FI.Locals); + emitGlobalVariableList(FI.Globals); + emitLexicalBlockList(FI.ChildBlocks, FI); + + // Emit inlined call site information. Only emit functions inlined directly + // into the parent function. We'll emit the other sites recursively as part + // of their parent inline site. + for (const DILocation *InlinedAt : FI.ChildSites) { + auto I = FI.InlineSites.find(InlinedAt); + assert(I != FI.InlineSites.end() && + "child site not in function inline site map"); + emitInlinedCallSite(FI, InlinedAt, I->second); + } + + for (auto Annot : FI.Annotations) { + MCSymbol *Label = Annot.first; + MDTuple *Strs = cast<MDTuple>(Annot.second); + MCSymbol *AnnotEnd = beginSymbolRecord(SymbolKind::S_ANNOTATION); + OS.EmitCOFFSecRel32(Label, /*Offset=*/0); + // FIXME: Make sure we don't overflow the max record size. + OS.EmitCOFFSectionIndex(Label); + OS.EmitIntValue(Strs->getNumOperands(), 2); + for (Metadata *MD : Strs->operands()) { + // MDStrings are null terminated, so we can do EmitBytes and get the + // nice .asciz directive. + StringRef Str = cast<MDString>(MD)->getString(); + assert(Str.data()[Str.size()] == '\0' && "non-nullterminated MDString"); + OS.EmitBytes(StringRef(Str.data(), Str.size() + 1)); + } + endSymbolRecord(AnnotEnd); + } + + for (auto HeapAllocSite : FI.HeapAllocSites) { + MCSymbol *BeginLabel = std::get<0>(HeapAllocSite); + MCSymbol *EndLabel = std::get<1>(HeapAllocSite); + + // The labels might not be defined if the instruction was replaced + // somewhere in the codegen pipeline. + if (!BeginLabel->isDefined() || !EndLabel->isDefined()) + continue; + + const DIType *DITy = std::get<2>(HeapAllocSite); + MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE); + OS.AddComment("Call site offset"); + OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0); + OS.AddComment("Call site section index"); + OS.EmitCOFFSectionIndex(BeginLabel); + OS.AddComment("Call instruction length"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); + OS.AddComment("Type index"); + OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4); + endSymbolRecord(HeapAllocEnd); + } + + if (SP != nullptr) + emitDebugInfoForUDTs(LocalUDTs); + + // We're done with this function. + emitEndSymbolRecord(SymbolKind::S_PROC_ID_END); + } + endCVSubsection(SymbolsEnd); + + // We have an assembler directive that takes care of the whole line table. + OS.EmitCVLinetableDirective(FI.FuncId, Fn, FI.End); +} + +CodeViewDebug::LocalVarDefRange +CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) { + LocalVarDefRange DR; + DR.InMemory = -1; + DR.DataOffset = Offset; + assert(DR.DataOffset == Offset && "truncation"); + DR.IsSubfield = 0; + DR.StructOffset = 0; + DR.CVRegister = CVRegister; + return DR; +} + +void CodeViewDebug::collectVariableInfoFromMFTable( + DenseSet<InlinedEntity> &Processed) { + const MachineFunction &MF = *Asm->MF; + const TargetSubtargetInfo &TSI = MF.getSubtarget(); + const TargetFrameLowering *TFI = TSI.getFrameLowering(); + const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); + + for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) { + if (!VI.Var) + continue; + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + Processed.insert(InlinedEntity(VI.Var, VI.Loc->getInlinedAt())); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); + + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + // If the variable has an attached offset expression, extract it. + // FIXME: Try to handle DW_OP_deref as well. + int64_t ExprOffset = 0; + bool Deref = false; + if (VI.Expr) { + // If there is one DW_OP_deref element, use offset of 0 and keep going. + if (VI.Expr->getNumElements() == 1 && + VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref) + Deref = true; + else if (!VI.Expr->extractIfOffset(ExprOffset)) + continue; + } + + // Get the frame register used and the offset. + unsigned FrameReg = 0; + int FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg); + uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg); + + // Calculate the label ranges. + LocalVarDefRange DefRange = + createDefRangeMem(CVReg, FrameOffset + ExprOffset); + + for (const InsnRange &Range : Scope->getRanges()) { + const MCSymbol *Begin = getLabelBeforeInsn(Range.first); + const MCSymbol *End = getLabelAfterInsn(Range.second); + End = End ? End : Asm->getFunctionEnd(); + DefRange.Ranges.emplace_back(Begin, End); + } + + LocalVariable Var; + Var.DIVar = VI.Var; + Var.DefRanges.emplace_back(std::move(DefRange)); + if (Deref) + Var.UseReferenceType = true; + + recordLocalVariable(std::move(Var), Scope); + } +} + +static bool canUseReferenceType(const DbgVariableLocation &Loc) { + return !Loc.LoadChain.empty() && Loc.LoadChain.back() == 0; +} + +static bool needsReferenceType(const DbgVariableLocation &Loc) { + return Loc.LoadChain.size() == 2 && Loc.LoadChain.back() == 0; +} + +void CodeViewDebug::calculateRanges( + LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) { + const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo(); + + // Calculate the definition ranges. + for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) { + const auto &Entry = *I; + if (!Entry.isDbgValue()) + continue; + const MachineInstr *DVInst = Entry.getInstr(); + assert(DVInst->isDebugValue() && "Invalid History entry"); + // FIXME: Find a way to represent constant variables, since they are + // relatively common. + Optional<DbgVariableLocation> Location = + DbgVariableLocation::extractFromMachineInstruction(*DVInst); + if (!Location) + continue; + + // CodeView can only express variables in register and variables in memory + // at a constant offset from a register. However, for variables passed + // indirectly by pointer, it is common for that pointer to be spilled to a + // stack location. For the special case of one offseted load followed by a + // zero offset load (a pointer spilled to the stack), we change the type of + // the local variable from a value type to a reference type. This tricks the + // debugger into doing the load for us. + if (Var.UseReferenceType) { + // We're using a reference type. Drop the last zero offset load. + if (canUseReferenceType(*Location)) + Location->LoadChain.pop_back(); + else + continue; + } else if (needsReferenceType(*Location)) { + // This location can't be expressed without switching to a reference type. + // Start over using that. + Var.UseReferenceType = true; + Var.DefRanges.clear(); + calculateRanges(Var, Entries); + return; + } + + // We can only handle a register or an offseted load of a register. + if (Location->Register == 0 || Location->LoadChain.size() > 1) + continue; + { + LocalVarDefRange DR; + DR.CVRegister = TRI->getCodeViewRegNum(Location->Register); + DR.InMemory = !Location->LoadChain.empty(); + DR.DataOffset = + !Location->LoadChain.empty() ? Location->LoadChain.back() : 0; + if (Location->FragmentInfo) { + DR.IsSubfield = true; + DR.StructOffset = Location->FragmentInfo->OffsetInBits / 8; + } else { + DR.IsSubfield = false; + DR.StructOffset = 0; + } + + if (Var.DefRanges.empty() || + Var.DefRanges.back().isDifferentLocation(DR)) { + Var.DefRanges.emplace_back(std::move(DR)); + } + } + + // Compute the label range. + const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr()); + const MCSymbol *End; + if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) { + auto &EndingEntry = Entries[Entry.getEndIndex()]; + End = EndingEntry.isDbgValue() + ? getLabelBeforeInsn(EndingEntry.getInstr()) + : getLabelAfterInsn(EndingEntry.getInstr()); + } else + End = Asm->getFunctionEnd(); + + // If the last range end is our begin, just extend the last range. + // Otherwise make a new range. + SmallVectorImpl<std::pair<const MCSymbol *, const MCSymbol *>> &R = + Var.DefRanges.back().Ranges; + if (!R.empty() && R.back().second == Begin) + R.back().second = End; + else + R.emplace_back(Begin, End); + + // FIXME: Do more range combining. + } +} + +void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) { + DenseSet<InlinedEntity> Processed; + // Grab the variable info that was squirreled away in the MMI side-table. + collectVariableInfoFromMFTable(Processed); + + for (const auto &I : DbgValues) { + InlinedEntity IV = I.first; + if (Processed.count(IV)) + continue; + const DILocalVariable *DIVar = cast<DILocalVariable>(IV.first); + const DILocation *InlinedAt = IV.second; + + // Instruction ranges, specifying where IV is accessible. + const auto &Entries = I.second; + + LexicalScope *Scope = nullptr; + if (InlinedAt) + Scope = LScopes.findInlinedScope(DIVar->getScope(), InlinedAt); + else + Scope = LScopes.findLexicalScope(DIVar->getScope()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + LocalVariable Var; + Var.DIVar = DIVar; + + calculateRanges(Var, Entries); + recordLocalVariable(std::move(Var), Scope); + } +} + +void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { + const TargetSubtargetInfo &TSI = MF->getSubtarget(); + const TargetRegisterInfo *TRI = TSI.getRegisterInfo(); + const MachineFrameInfo &MFI = MF->getFrameInfo(); + const Function &GV = MF->getFunction(); + auto Insertion = FnDebugInfo.insert({&GV, std::make_unique<FunctionInfo>()}); + assert(Insertion.second && "function already has info"); + CurFn = Insertion.first->second.get(); + CurFn->FuncId = NextFuncId++; + CurFn->Begin = Asm->getFunctionBegin(); + + // The S_FRAMEPROC record reports the stack size, and how many bytes of + // callee-saved registers were used. For targets that don't use a PUSH + // instruction (AArch64), this will be zero. + CurFn->CSRSize = MFI.getCVBytesOfCalleeSavedRegisters(); + CurFn->FrameSize = MFI.getStackSize(); + CurFn->OffsetAdjustment = MFI.getOffsetAdjustment(); + CurFn->HasStackRealignment = TRI->needsStackRealignment(*MF); + + // For this function S_FRAMEPROC record, figure out which codeview register + // will be the frame pointer. + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::None; // None. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::None; // None. + if (CurFn->FrameSize > 0) { + if (!TSI.getFrameLowering()->hasFP(*MF)) { + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr; + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr; + } else { + // If there is an FP, parameters are always relative to it. + CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr; + if (CurFn->HasStackRealignment) { + // If the stack needs realignment, locals are relative to SP or VFRAME. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr; + } else { + // Otherwise, locals are relative to EBP, and we probably have VLAs or + // other stack adjustments. + CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::FramePtr; + } + } + } + + // Compute other frame procedure options. + FrameProcedureOptions FPO = FrameProcedureOptions::None; + if (MFI.hasVarSizedObjects()) + FPO |= FrameProcedureOptions::HasAlloca; + if (MF->exposesReturnsTwice()) + FPO |= FrameProcedureOptions::HasSetJmp; + // FIXME: Set HasLongJmp if we ever track that info. + if (MF->hasInlineAsm()) + FPO |= FrameProcedureOptions::HasInlineAssembly; + if (GV.hasPersonalityFn()) { + if (isAsynchronousEHPersonality( + classifyEHPersonality(GV.getPersonalityFn()))) + FPO |= FrameProcedureOptions::HasStructuredExceptionHandling; + else + FPO |= FrameProcedureOptions::HasExceptionHandling; + } + if (GV.hasFnAttribute(Attribute::InlineHint)) + FPO |= FrameProcedureOptions::MarkedInline; + if (GV.hasFnAttribute(Attribute::Naked)) + FPO |= FrameProcedureOptions::Naked; + if (MFI.hasStackProtectorIndex()) + FPO |= FrameProcedureOptions::SecurityChecks; + FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U); + FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U); + if (Asm->TM.getOptLevel() != CodeGenOpt::None && + !GV.hasOptSize() && !GV.hasOptNone()) + FPO |= FrameProcedureOptions::OptimizedForSpeed; + // FIXME: Set GuardCfg when it is implemented. + CurFn->FrameProcOpts = FPO; + + OS.EmitCVFuncIdDirective(CurFn->FuncId); + + // Find the end of the function prolog. First known non-DBG_VALUE and + // non-frame setup location marks the beginning of the function body. + // FIXME: is there a simpler a way to do this? Can we just search + // for the first instruction of the function, not the last of the prolog? + DebugLoc PrologEndLoc; + bool EmptyPrologue = true; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && + MI.getDebugLoc()) { + PrologEndLoc = MI.getDebugLoc(); + break; + } else if (!MI.isMetaInstruction()) { + EmptyPrologue = false; + } + } + } + + // Record beginning of function if we have a non-empty prologue. + if (PrologEndLoc && !EmptyPrologue) { + DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); + maybeRecordLocation(FnStartDL, MF); + } +} + +static bool shouldEmitUdt(const DIType *T) { + if (!T) + return false; + + // MSVC does not emit UDTs for typedefs that are scoped to classes. + if (T->getTag() == dwarf::DW_TAG_typedef) { + if (DIScope *Scope = T->getScope()) { + switch (Scope->getTag()) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_union_type: + return false; + } + } + } + + while (true) { + if (!T || T->isForwardDecl()) + return false; + + const DIDerivedType *DT = dyn_cast<DIDerivedType>(T); + if (!DT) + return true; + T = DT->getBaseType(); + } + return true; +} + +void CodeViewDebug::addToUDTs(const DIType *Ty) { + // Don't record empty UDTs. + if (Ty->getName().empty()) + return; + if (!shouldEmitUdt(Ty)) + return; + + SmallVector<StringRef, 5> QualifiedNameComponents; + const DISubprogram *ClosestSubprogram = + getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents); + + std::string FullyQualifiedName = + getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty)); + + if (ClosestSubprogram == nullptr) { + GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty); + } else if (ClosestSubprogram == CurrentSubprogram) { + LocalUDTs.emplace_back(std::move(FullyQualifiedName), Ty); + } + + // TODO: What if the ClosestSubprogram is neither null or the current + // subprogram? Currently, the UDT just gets dropped on the floor. + // + // The current behavior is not desirable. To get maximal fidelity, we would + // need to perform all type translation before beginning emission of .debug$S + // and then make LocalUDTs a member of FunctionInfo +} + +TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) { + // Generic dispatch for lowering an unknown type. + switch (Ty->getTag()) { + case dwarf::DW_TAG_array_type: + return lowerTypeArray(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_typedef: + return lowerTypeAlias(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_base_type: + return lowerTypeBasic(cast<DIBasicType>(Ty)); + case dwarf::DW_TAG_pointer_type: + if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type") + return lowerTypeVFTableShape(cast<DIDerivedType>(Ty)); + LLVM_FALLTHROUGH; + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_rvalue_reference_type: + return lowerTypePointer(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_ptr_to_member_type: + return lowerTypeMemberPointer(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_restrict_type: + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + // TODO: add support for DW_TAG_atomic_type here + return lowerTypeModifier(cast<DIDerivedType>(Ty)); + case dwarf::DW_TAG_subroutine_type: + if (ClassTy) { + // The member function type of a member function pointer has no + // ThisAdjustment. + return lowerTypeMemberFunction(cast<DISubroutineType>(Ty), ClassTy, + /*ThisAdjustment=*/0, + /*IsStaticMethod=*/false); + } + return lowerTypeFunction(cast<DISubroutineType>(Ty)); + case dwarf::DW_TAG_enumeration_type: + return lowerTypeEnum(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + return lowerTypeClass(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_union_type: + return lowerTypeUnion(cast<DICompositeType>(Ty)); + case dwarf::DW_TAG_unspecified_type: + if (Ty->getName() == "decltype(nullptr)") + return TypeIndex::NullptrT(); + return TypeIndex::None(); + default: + // Use the null type index. + return TypeIndex(); + } +} + +TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) { + TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType()); + StringRef TypeName = Ty->getName(); + + addToUDTs(Ty); + + if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::Int32Long) && + TypeName == "HRESULT") + return TypeIndex(SimpleTypeKind::HResult); + if (UnderlyingTypeIndex == TypeIndex(SimpleTypeKind::UInt16Short) && + TypeName == "wchar_t") + return TypeIndex(SimpleTypeKind::WideCharacter); + + return UnderlyingTypeIndex; +} + +TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) { + const DIType *ElementType = Ty->getBaseType(); + TypeIndex ElementTypeIndex = getTypeIndex(ElementType); + // IndexType is size_t, which depends on the bitness of the target. + TypeIndex IndexType = getPointerSizeInBytes() == 8 + ? TypeIndex(SimpleTypeKind::UInt64Quad) + : TypeIndex(SimpleTypeKind::UInt32Long); + + uint64_t ElementSize = getBaseTypeSize(ElementType) / 8; + + // Add subranges to array type. + DINodeArray Elements = Ty->getElements(); + for (int i = Elements.size() - 1; i >= 0; --i) { + const DINode *Element = Elements[i]; + assert(Element->getTag() == dwarf::DW_TAG_subrange_type); + + const DISubrange *Subrange = cast<DISubrange>(Element); + assert(Subrange->getLowerBound() == 0 && + "codeview doesn't support subranges with lower bounds"); + int64_t Count = -1; + if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>()) + Count = CI->getSExtValue(); + + // Forward declarations of arrays without a size and VLAs use a count of -1. + // Emit a count of zero in these cases to match what MSVC does for arrays + // without a size. MSVC doesn't support VLAs, so it's not clear what we + // should do for them even if we could distinguish them. + if (Count == -1) + Count = 0; + + // Update the element size and element type index for subsequent subranges. + ElementSize *= Count; + + // If this is the outermost array, use the size from the array. It will be + // more accurate if we had a VLA or an incomplete element type size. + uint64_t ArraySize = + (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize; + + StringRef Name = (i == 0) ? Ty->getName() : ""; + ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name); + ElementTypeIndex = TypeTable.writeLeafType(AR); + } + + return ElementTypeIndex; +} + +TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) { + TypeIndex Index; + dwarf::TypeKind Kind; + uint32_t ByteSize; + + Kind = static_cast<dwarf::TypeKind>(Ty->getEncoding()); + ByteSize = Ty->getSizeInBits() / 8; + + SimpleTypeKind STK = SimpleTypeKind::None; + switch (Kind) { + case dwarf::DW_ATE_address: + // FIXME: Translate + break; + case dwarf::DW_ATE_boolean: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::Boolean8; break; + case 2: STK = SimpleTypeKind::Boolean16; break; + case 4: STK = SimpleTypeKind::Boolean32; break; + case 8: STK = SimpleTypeKind::Boolean64; break; + case 16: STK = SimpleTypeKind::Boolean128; break; + } + break; + case dwarf::DW_ATE_complex_float: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Complex16; break; + case 4: STK = SimpleTypeKind::Complex32; break; + case 8: STK = SimpleTypeKind::Complex64; break; + case 10: STK = SimpleTypeKind::Complex80; break; + case 16: STK = SimpleTypeKind::Complex128; break; + } + break; + case dwarf::DW_ATE_float: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Float16; break; + case 4: STK = SimpleTypeKind::Float32; break; + case 6: STK = SimpleTypeKind::Float48; break; + case 8: STK = SimpleTypeKind::Float64; break; + case 10: STK = SimpleTypeKind::Float80; break; + case 16: STK = SimpleTypeKind::Float128; break; + } + break; + case dwarf::DW_ATE_signed: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::SignedCharacter; break; + case 2: STK = SimpleTypeKind::Int16Short; break; + case 4: STK = SimpleTypeKind::Int32; break; + case 8: STK = SimpleTypeKind::Int64Quad; break; + case 16: STK = SimpleTypeKind::Int128Oct; break; + } + break; + case dwarf::DW_ATE_unsigned: + switch (ByteSize) { + case 1: STK = SimpleTypeKind::UnsignedCharacter; break; + case 2: STK = SimpleTypeKind::UInt16Short; break; + case 4: STK = SimpleTypeKind::UInt32; break; + case 8: STK = SimpleTypeKind::UInt64Quad; break; + case 16: STK = SimpleTypeKind::UInt128Oct; break; + } + break; + case dwarf::DW_ATE_UTF: + switch (ByteSize) { + case 2: STK = SimpleTypeKind::Character16; break; + case 4: STK = SimpleTypeKind::Character32; break; + } + break; + case dwarf::DW_ATE_signed_char: + if (ByteSize == 1) + STK = SimpleTypeKind::SignedCharacter; + break; + case dwarf::DW_ATE_unsigned_char: + if (ByteSize == 1) + STK = SimpleTypeKind::UnsignedCharacter; + break; + default: + break; + } + + // Apply some fixups based on the source-level type name. + if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int") + STK = SimpleTypeKind::Int32Long; + if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int") + STK = SimpleTypeKind::UInt32Long; + if (STK == SimpleTypeKind::UInt16Short && + (Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t")) + STK = SimpleTypeKind::WideCharacter; + if ((STK == SimpleTypeKind::SignedCharacter || + STK == SimpleTypeKind::UnsignedCharacter) && + Ty->getName() == "char") + STK = SimpleTypeKind::NarrowCharacter; + + return TypeIndex(STK); +} + +TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty, + PointerOptions PO) { + TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType()); + + // Pointers to simple types without any options can use SimpleTypeMode, rather + // than having a dedicated pointer type record. + if (PointeeTI.isSimple() && PO == PointerOptions::None && + PointeeTI.getSimpleMode() == SimpleTypeMode::Direct && + Ty->getTag() == dwarf::DW_TAG_pointer_type) { + SimpleTypeMode Mode = Ty->getSizeInBits() == 64 + ? SimpleTypeMode::NearPointer64 + : SimpleTypeMode::NearPointer32; + return TypeIndex(PointeeTI.getSimpleKind(), Mode); + } + + PointerKind PK = + Ty->getSizeInBits() == 64 ? PointerKind::Near64 : PointerKind::Near32; + PointerMode PM = PointerMode::Pointer; + switch (Ty->getTag()) { + default: llvm_unreachable("not a pointer tag type"); + case dwarf::DW_TAG_pointer_type: + PM = PointerMode::Pointer; + break; + case dwarf::DW_TAG_reference_type: + PM = PointerMode::LValueReference; + break; + case dwarf::DW_TAG_rvalue_reference_type: + PM = PointerMode::RValueReference; + break; + } + + if (Ty->isObjectPointer()) + PO |= PointerOptions::Const; + + PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8); + return TypeTable.writeLeafType(PR); +} + +static PointerToMemberRepresentation +translatePtrToMemberRep(unsigned SizeInBytes, bool IsPMF, unsigned Flags) { + // SizeInBytes being zero generally implies that the member pointer type was + // incomplete, which can happen if it is part of a function prototype. In this + // case, use the unknown model instead of the general model. + if (IsPMF) { + switch (Flags & DINode::FlagPtrToMemberRep) { + case 0: + return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown + : PointerToMemberRepresentation::GeneralFunction; + case DINode::FlagSingleInheritance: + return PointerToMemberRepresentation::SingleInheritanceFunction; + case DINode::FlagMultipleInheritance: + return PointerToMemberRepresentation::MultipleInheritanceFunction; + case DINode::FlagVirtualInheritance: + return PointerToMemberRepresentation::VirtualInheritanceFunction; + } + } else { + switch (Flags & DINode::FlagPtrToMemberRep) { + case 0: + return SizeInBytes == 0 ? PointerToMemberRepresentation::Unknown + : PointerToMemberRepresentation::GeneralData; + case DINode::FlagSingleInheritance: + return PointerToMemberRepresentation::SingleInheritanceData; + case DINode::FlagMultipleInheritance: + return PointerToMemberRepresentation::MultipleInheritanceData; + case DINode::FlagVirtualInheritance: + return PointerToMemberRepresentation::VirtualInheritanceData; + } + } + llvm_unreachable("invalid ptr to member representation"); +} + +TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty, + PointerOptions PO) { + assert(Ty->getTag() == dwarf::DW_TAG_ptr_to_member_type); + TypeIndex ClassTI = getTypeIndex(Ty->getClassType()); + TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType(), Ty->getClassType()); + PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32; + bool IsPMF = isa<DISubroutineType>(Ty->getBaseType()); + PointerMode PM = IsPMF ? PointerMode::PointerToMemberFunction + : PointerMode::PointerToDataMember; + + assert(Ty->getSizeInBits() / 8 <= 0xff && "pointer size too big"); + uint8_t SizeInBytes = Ty->getSizeInBits() / 8; + MemberPointerInfo MPI( + ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags())); + PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI); + return TypeTable.writeLeafType(PR); +} + +/// Given a DWARF calling convention, get the CodeView equivalent. If we don't +/// have a translation, use the NearC convention. +static CallingConvention dwarfCCToCodeView(unsigned DwarfCC) { + switch (DwarfCC) { + case dwarf::DW_CC_normal: return CallingConvention::NearC; + case dwarf::DW_CC_BORLAND_msfastcall: return CallingConvention::NearFast; + case dwarf::DW_CC_BORLAND_thiscall: return CallingConvention::ThisCall; + case dwarf::DW_CC_BORLAND_stdcall: return CallingConvention::NearStdCall; + case dwarf::DW_CC_BORLAND_pascal: return CallingConvention::NearPascal; + case dwarf::DW_CC_LLVM_vectorcall: return CallingConvention::NearVector; + } + return CallingConvention::NearC; +} + +TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) { + ModifierOptions Mods = ModifierOptions::None; + PointerOptions PO = PointerOptions::None; + bool IsModifier = true; + const DIType *BaseTy = Ty; + while (IsModifier && BaseTy) { + // FIXME: Need to add DWARF tags for __unaligned and _Atomic + switch (BaseTy->getTag()) { + case dwarf::DW_TAG_const_type: + Mods |= ModifierOptions::Const; + PO |= PointerOptions::Const; + break; + case dwarf::DW_TAG_volatile_type: + Mods |= ModifierOptions::Volatile; + PO |= PointerOptions::Volatile; + break; + case dwarf::DW_TAG_restrict_type: + // Only pointer types be marked with __restrict. There is no known flag + // for __restrict in LF_MODIFIER records. + PO |= PointerOptions::Restrict; + break; + default: + IsModifier = false; + break; + } + if (IsModifier) + BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType(); + } + + // Check if the inner type will use an LF_POINTER record. If so, the + // qualifiers will go in the LF_POINTER record. This comes up for types like + // 'int *const' and 'int *__restrict', not the more common cases like 'const + // char *'. + if (BaseTy) { + switch (BaseTy->getTag()) { + case dwarf::DW_TAG_pointer_type: + case dwarf::DW_TAG_reference_type: + case dwarf::DW_TAG_rvalue_reference_type: + return lowerTypePointer(cast<DIDerivedType>(BaseTy), PO); + case dwarf::DW_TAG_ptr_to_member_type: + return lowerTypeMemberPointer(cast<DIDerivedType>(BaseTy), PO); + default: + break; + } + } + + TypeIndex ModifiedTI = getTypeIndex(BaseTy); + + // Return the base type index if there aren't any modifiers. For example, the + // metadata could contain restrict wrappers around non-pointer types. + if (Mods == ModifierOptions::None) + return ModifiedTI; + + ModifierRecord MR(ModifiedTI, Mods); + return TypeTable.writeLeafType(MR); +} + +TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) { + SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices; + for (const DIType *ArgType : Ty->getTypeArray()) + ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType)); + + // MSVC uses type none for variadic argument. + if (ReturnAndArgTypeIndices.size() > 1 && + ReturnAndArgTypeIndices.back() == TypeIndex::Void()) { + ReturnAndArgTypeIndices.back() = TypeIndex::None(); + } + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + ArrayRef<TypeIndex> ArgTypeIndices = None; + if (!ReturnAndArgTypeIndices.empty()) { + auto ReturnAndArgTypesRef = makeArrayRef(ReturnAndArgTypeIndices); + ReturnTypeIndex = ReturnAndArgTypesRef.front(); + ArgTypeIndices = ReturnAndArgTypesRef.drop_front(); + } + + ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); + + CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); + + FunctionOptions FO = getFunctionOptions(Ty); + ProcedureRecord Procedure(ReturnTypeIndex, CC, FO, ArgTypeIndices.size(), + ArgListIndex); + return TypeTable.writeLeafType(Procedure); +} + +TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty, + const DIType *ClassTy, + int ThisAdjustment, + bool IsStaticMethod, + FunctionOptions FO) { + // Lower the containing class type. + TypeIndex ClassType = getTypeIndex(ClassTy); + + DITypeRefArray ReturnAndArgs = Ty->getTypeArray(); + + unsigned Index = 0; + SmallVector<TypeIndex, 8> ArgTypeIndices; + TypeIndex ReturnTypeIndex = TypeIndex::Void(); + if (ReturnAndArgs.size() > Index) { + ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]); + } + + // If the first argument is a pointer type and this isn't a static method, + // treat it as the special 'this' parameter, which is encoded separately from + // the arguments. + TypeIndex ThisTypeIndex; + if (!IsStaticMethod && ReturnAndArgs.size() > Index) { + if (const DIDerivedType *PtrTy = + dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) { + if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) { + ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty); + Index++; + } + } + } + + while (Index < ReturnAndArgs.size()) + ArgTypeIndices.push_back(getTypeIndex(ReturnAndArgs[Index++])); + + // MSVC uses type none for variadic argument. + if (!ArgTypeIndices.empty() && ArgTypeIndices.back() == TypeIndex::Void()) + ArgTypeIndices.back() = TypeIndex::None(); + + ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices); + TypeIndex ArgListIndex = TypeTable.writeLeafType(ArgListRec); + + CallingConvention CC = dwarfCCToCodeView(Ty->getCC()); + + MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FO, + ArgTypeIndices.size(), ArgListIndex, ThisAdjustment); + return TypeTable.writeLeafType(MFR); +} + +TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) { + unsigned VSlotCount = + Ty->getSizeInBits() / (8 * Asm->MAI->getCodePointerSize()); + SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near); + + VFTableShapeRecord VFTSR(Slots); + return TypeTable.writeLeafType(VFTSR); +} + +static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) { + switch (Flags & DINode::FlagAccessibility) { + case DINode::FlagPrivate: return MemberAccess::Private; + case DINode::FlagPublic: return MemberAccess::Public; + case DINode::FlagProtected: return MemberAccess::Protected; + case 0: + // If there was no explicit access control, provide the default for the tag. + return RecordTag == dwarf::DW_TAG_class_type ? MemberAccess::Private + : MemberAccess::Public; + } + llvm_unreachable("access flags are exclusive"); +} + +static MethodOptions translateMethodOptionFlags(const DISubprogram *SP) { + if (SP->isArtificial()) + return MethodOptions::CompilerGenerated; + + // FIXME: Handle other MethodOptions. + + return MethodOptions::None; +} + +static MethodKind translateMethodKindFlags(const DISubprogram *SP, + bool Introduced) { + if (SP->getFlags() & DINode::FlagStaticMember) + return MethodKind::Static; + + switch (SP->getVirtuality()) { + case dwarf::DW_VIRTUALITY_none: + break; + case dwarf::DW_VIRTUALITY_virtual: + return Introduced ? MethodKind::IntroducingVirtual : MethodKind::Virtual; + case dwarf::DW_VIRTUALITY_pure_virtual: + return Introduced ? MethodKind::PureIntroducingVirtual + : MethodKind::PureVirtual; + default: + llvm_unreachable("unhandled virtuality case"); + } + + return MethodKind::Vanilla; +} + +static TypeRecordKind getRecordKind(const DICompositeType *Ty) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: return TypeRecordKind::Class; + case dwarf::DW_TAG_structure_type: return TypeRecordKind::Struct; + } + llvm_unreachable("unexpected tag"); +} + +/// Return ClassOptions that should be present on both the forward declaration +/// and the defintion of a tag type. +static ClassOptions getCommonClassOptions(const DICompositeType *Ty) { + ClassOptions CO = ClassOptions::None; + + // MSVC always sets this flag, even for local types. Clang doesn't always + // appear to give every type a linkage name, which may be problematic for us. + // FIXME: Investigate the consequences of not following them here. + if (!Ty->getIdentifier().empty()) + CO |= ClassOptions::HasUniqueName; + + // Put the Nested flag on a type if it appears immediately inside a tag type. + // Do not walk the scope chain. Do not attempt to compute ContainsNestedClass + // here. That flag is only set on definitions, and not forward declarations. + const DIScope *ImmediateScope = Ty->getScope(); + if (ImmediateScope && isa<DICompositeType>(ImmediateScope)) + CO |= ClassOptions::Nested; + + // Put the Scoped flag on function-local types. MSVC puts this flag for enum + // type only when it has an immediate function scope. Clang never puts enums + // inside DILexicalBlock scopes. Enum types, as generated by clang, are + // always in function, class, or file scopes. + if (Ty->getTag() == dwarf::DW_TAG_enumeration_type) { + if (ImmediateScope && isa<DISubprogram>(ImmediateScope)) + CO |= ClassOptions::Scoped; + } else { + for (const DIScope *Scope = ImmediateScope; Scope != nullptr; + Scope = Scope->getScope()) { + if (isa<DISubprogram>(Scope)) { + CO |= ClassOptions::Scoped; + break; + } + } + } + + return CO; +} + +void CodeViewDebug::addUDTSrcLine(const DIType *Ty, TypeIndex TI) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + break; + default: + return; + } + + if (const auto *File = Ty->getFile()) { + StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(File)); + TypeIndex SIDI = TypeTable.writeLeafType(SIDR); + + UdtSourceLineRecord USLR(TI, SIDI, Ty->getLine()); + TypeTable.writeLeafType(USLR); + } +} + +TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { + ClassOptions CO = getCommonClassOptions(Ty); + TypeIndex FTI; + unsigned EnumeratorCount = 0; + + if (Ty->isForwardDecl()) { + CO |= ClassOptions::ForwardReference; + } else { + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); + for (const DINode *Element : Ty->getElements()) { + // We assume that the frontend provides all members in source declaration + // order, which is what MSVC does. + if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) { + EnumeratorRecord ER(MemberAccess::Public, + APSInt::getUnsigned(Enumerator->getValue()), + Enumerator->getName()); + ContinuationBuilder.writeMemberType(ER); + EnumeratorCount++; + } + } + FTI = TypeTable.insertRecord(ContinuationBuilder); + } + + std::string FullName = getFullyQualifiedName(Ty); + + EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(), + getTypeIndex(Ty->getBaseType())); + TypeIndex EnumTI = TypeTable.writeLeafType(ER); + + addUDTSrcLine(Ty, EnumTI); + + return EnumTI; +} + +//===----------------------------------------------------------------------===// +// ClassInfo +//===----------------------------------------------------------------------===// + +struct llvm::ClassInfo { + struct MemberInfo { + const DIDerivedType *MemberTypeNode; + uint64_t BaseOffset; + }; + // [MemberInfo] + using MemberList = std::vector<MemberInfo>; + + using MethodsList = TinyPtrVector<const DISubprogram *>; + // MethodName -> MethodsList + using MethodsMap = MapVector<MDString *, MethodsList>; + + /// Base classes. + std::vector<const DIDerivedType *> Inheritance; + + /// Direct members. + MemberList Members; + // Direct overloaded methods gathered by name. + MethodsMap Methods; + + TypeIndex VShapeTI; + + std::vector<const DIType *> NestedTypes; +}; + +void CodeViewDebug::clear() { + assert(CurFn == nullptr); + FileIdMap.clear(); + FnDebugInfo.clear(); + FileToFilepathMap.clear(); + LocalUDTs.clear(); + GlobalUDTs.clear(); + TypeIndices.clear(); + CompleteTypeIndices.clear(); + ScopeGlobals.clear(); +} + +void CodeViewDebug::collectMemberInfo(ClassInfo &Info, + const DIDerivedType *DDTy) { + if (!DDTy->getName().empty()) { + Info.Members.push_back({DDTy, 0}); + return; + } + + // An unnamed member may represent a nested struct or union. Attempt to + // interpret the unnamed member as a DICompositeType possibly wrapped in + // qualifier types. Add all the indirect fields to the current record if that + // succeeds, and drop the member if that fails. + assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!"); + uint64_t Offset = DDTy->getOffsetInBits(); + const DIType *Ty = DDTy->getBaseType(); + bool FullyResolved = false; + while (!FullyResolved) { + switch (Ty->getTag()) { + case dwarf::DW_TAG_const_type: + case dwarf::DW_TAG_volatile_type: + // FIXME: we should apply the qualifier types to the indirect fields + // rather than dropping them. + Ty = cast<DIDerivedType>(Ty)->getBaseType(); + break; + default: + FullyResolved = true; + break; + } + } + + const DICompositeType *DCTy = dyn_cast<DICompositeType>(Ty); + if (!DCTy) + return; + + ClassInfo NestedInfo = collectClassInfo(DCTy); + for (const ClassInfo::MemberInfo &IndirectField : NestedInfo.Members) + Info.Members.push_back( + {IndirectField.MemberTypeNode, IndirectField.BaseOffset + Offset}); +} + +ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) { + ClassInfo Info; + // Add elements to structure type. + DINodeArray Elements = Ty->getElements(); + for (auto *Element : Elements) { + // We assume that the frontend provides all members in source declaration + // order, which is what MSVC does. + if (!Element) + continue; + if (auto *SP = dyn_cast<DISubprogram>(Element)) { + Info.Methods[SP->getRawName()].push_back(SP); + } else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_member) { + collectMemberInfo(Info, DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) { + Info.Inheritance.push_back(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type && + DDTy->getName() == "__vtbl_ptr_type") { + Info.VShapeTI = getTypeIndex(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_typedef) { + Info.NestedTypes.push_back(DDTy); + } else if (DDTy->getTag() == dwarf::DW_TAG_friend) { + // Ignore friend members. It appears that MSVC emitted info about + // friends in the past, but modern versions do not. + } + } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { + Info.NestedTypes.push_back(Composite); + } + // Skip other unrecognized kinds of elements. + } + return Info; +} + +static bool shouldAlwaysEmitCompleteClassType(const DICompositeType *Ty) { + // This routine is used by lowerTypeClass and lowerTypeUnion to determine + // if a complete type should be emitted instead of a forward reference. + return Ty->getName().empty() && Ty->getIdentifier().empty() && + !Ty->isForwardDecl(); +} + +TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) { + // Emit the complete type for unnamed structs. C++ classes with methods + // which have a circular reference back to the class type are expected to + // be named by the front-end and should not be "unnamed". C unnamed + // structs should not have circular references. + if (shouldAlwaysEmitCompleteClassType(Ty)) { + // If this unnamed complete type is already in the process of being defined + // then the description of the type is malformed and cannot be emitted + // into CodeView correctly so report a fatal error. + auto I = CompleteTypeIndices.find(Ty); + if (I != CompleteTypeIndices.end() && I->second == TypeIndex()) + report_fatal_error("cannot debug circular reference to unnamed type"); + return getCompleteTypeIndex(Ty); + } + + // First, construct the forward decl. Don't look into Ty to compute the + // forward decl options, since it might not be available in all TUs. + TypeRecordKind Kind = getRecordKind(Ty); + ClassOptions CO = + ClassOptions::ForwardReference | getCommonClassOptions(Ty); + std::string FullName = getFullyQualifiedName(Ty); + ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0, + FullName, Ty->getIdentifier()); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(CR); + if (!Ty->isForwardDecl()) + DeferredCompleteTypes.push_back(Ty); + return FwdDeclTI; +} + +TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) { + // Construct the field list and complete type record. + TypeRecordKind Kind = getRecordKind(Ty); + ClassOptions CO = getCommonClassOptions(Ty); + TypeIndex FieldTI; + TypeIndex VShapeTI; + unsigned FieldCount; + bool ContainsNestedClass; + std::tie(FieldTI, VShapeTI, FieldCount, ContainsNestedClass) = + lowerRecordFieldList(Ty); + + if (ContainsNestedClass) + CO |= ClassOptions::ContainsNestedClass; + + // MSVC appears to set this flag by searching any destructor or method with + // FunctionOptions::Constructor among the emitted members. Clang AST has all + // the members, however special member functions are not yet emitted into + // debug information. For now checking a class's non-triviality seems enough. + // FIXME: not true for a nested unnamed struct. + if (isNonTrivial(Ty)) + CO |= ClassOptions::HasConstructorOrDestructor; + + std::string FullName = getFullyQualifiedName(Ty); + + uint64_t SizeInBytes = Ty->getSizeInBits() / 8; + + ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI, + SizeInBytes, FullName, Ty->getIdentifier()); + TypeIndex ClassTI = TypeTable.writeLeafType(CR); + + addUDTSrcLine(Ty, ClassTI); + + addToUDTs(Ty); + + return ClassTI; +} + +TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) { + // Emit the complete type for unnamed unions. + if (shouldAlwaysEmitCompleteClassType(Ty)) + return getCompleteTypeIndex(Ty); + + ClassOptions CO = + ClassOptions::ForwardReference | getCommonClassOptions(Ty); + std::string FullName = getFullyQualifiedName(Ty); + UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier()); + TypeIndex FwdDeclTI = TypeTable.writeLeafType(UR); + if (!Ty->isForwardDecl()) + DeferredCompleteTypes.push_back(Ty); + return FwdDeclTI; +} + +TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) { + ClassOptions CO = ClassOptions::Sealed | getCommonClassOptions(Ty); + TypeIndex FieldTI; + unsigned FieldCount; + bool ContainsNestedClass; + std::tie(FieldTI, std::ignore, FieldCount, ContainsNestedClass) = + lowerRecordFieldList(Ty); + + if (ContainsNestedClass) + CO |= ClassOptions::ContainsNestedClass; + + uint64_t SizeInBytes = Ty->getSizeInBits() / 8; + std::string FullName = getFullyQualifiedName(Ty); + + UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName, + Ty->getIdentifier()); + TypeIndex UnionTI = TypeTable.writeLeafType(UR); + + addUDTSrcLine(Ty, UnionTI); + + addToUDTs(Ty); + + return UnionTI; +} + +std::tuple<TypeIndex, TypeIndex, unsigned, bool> +CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { + // Manually count members. MSVC appears to count everything that generates a + // field list record. Each individual overload in a method overload group + // contributes to this count, even though the overload group is a single field + // list record. + unsigned MemberCount = 0; + ClassInfo Info = collectClassInfo(Ty); + ContinuationRecordBuilder ContinuationBuilder; + ContinuationBuilder.begin(ContinuationRecordKind::FieldList); + + // Create base classes. + for (const DIDerivedType *I : Info.Inheritance) { + if (I->getFlags() & DINode::FlagVirtual) { + // Virtual base. + unsigned VBPtrOffset = I->getVBPtrOffset(); + // FIXME: Despite the accessor name, the offset is really in bytes. + unsigned VBTableIndex = I->getOffsetInBits() / 4; + auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase + ? TypeRecordKind::IndirectVirtualBaseClass + : TypeRecordKind::VirtualBaseClass; + VirtualBaseClassRecord VBCR( + RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()), + getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset, + VBTableIndex); + + ContinuationBuilder.writeMemberType(VBCR); + MemberCount++; + } else { + assert(I->getOffsetInBits() % 8 == 0 && + "bases must be on byte boundaries"); + BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()), + getTypeIndex(I->getBaseType()), + I->getOffsetInBits() / 8); + ContinuationBuilder.writeMemberType(BCR); + MemberCount++; + } + } + + // Create members. + for (ClassInfo::MemberInfo &MemberInfo : Info.Members) { + const DIDerivedType *Member = MemberInfo.MemberTypeNode; + TypeIndex MemberBaseType = getTypeIndex(Member->getBaseType()); + StringRef MemberName = Member->getName(); + MemberAccess Access = + translateAccessFlags(Ty->getTag(), Member->getFlags()); + + if (Member->isStaticMember()) { + StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName); + ContinuationBuilder.writeMemberType(SDMR); + MemberCount++; + continue; + } + + // Virtual function pointer member. + if ((Member->getFlags() & DINode::FlagArtificial) && + Member->getName().startswith("_vptr$")) { + VFPtrRecord VFPR(getTypeIndex(Member->getBaseType())); + ContinuationBuilder.writeMemberType(VFPR); + MemberCount++; + continue; + } + + // Data member. + uint64_t MemberOffsetInBits = + Member->getOffsetInBits() + MemberInfo.BaseOffset; + if (Member->isBitField()) { + uint64_t StartBitOffset = MemberOffsetInBits; + if (const auto *CI = + dyn_cast_or_null<ConstantInt>(Member->getStorageOffsetInBits())) { + MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset; + } + StartBitOffset -= MemberOffsetInBits; + BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(), + StartBitOffset); + MemberBaseType = TypeTable.writeLeafType(BFR); + } + uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8; + DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes, + MemberName); + ContinuationBuilder.writeMemberType(DMR); + MemberCount++; + } + + // Create methods + for (auto &MethodItr : Info.Methods) { + StringRef Name = MethodItr.first->getString(); + + std::vector<OneMethodRecord> Methods; + for (const DISubprogram *SP : MethodItr.second) { + TypeIndex MethodType = getMemberFunctionType(SP, Ty); + bool Introduced = SP->getFlags() & DINode::FlagIntroducedVirtual; + + unsigned VFTableOffset = -1; + if (Introduced) + VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes(); + + Methods.push_back(OneMethodRecord( + MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()), + translateMethodKindFlags(SP, Introduced), + translateMethodOptionFlags(SP), VFTableOffset, Name)); + MemberCount++; + } + assert(!Methods.empty() && "Empty methods map entry"); + if (Methods.size() == 1) + ContinuationBuilder.writeMemberType(Methods[0]); + else { + // FIXME: Make this use its own ContinuationBuilder so that + // MethodOverloadList can be split correctly. + MethodOverloadListRecord MOLR(Methods); + TypeIndex MethodList = TypeTable.writeLeafType(MOLR); + + OverloadedMethodRecord OMR(Methods.size(), MethodList, Name); + ContinuationBuilder.writeMemberType(OMR); + } + } + + // Create nested classes. + for (const DIType *Nested : Info.NestedTypes) { + NestedTypeRecord R(getTypeIndex(Nested), Nested->getName()); + ContinuationBuilder.writeMemberType(R); + MemberCount++; + } + + TypeIndex FieldTI = TypeTable.insertRecord(ContinuationBuilder); + return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, + !Info.NestedTypes.empty()); +} + +TypeIndex CodeViewDebug::getVBPTypeIndex() { + if (!VBPType.getIndex()) { + // Make a 'const int *' type. + ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const); + TypeIndex ModifiedTI = TypeTable.writeLeafType(MR); + + PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32; + PointerMode PM = PointerMode::Pointer; + PointerOptions PO = PointerOptions::None; + PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes()); + VBPType = TypeTable.writeLeafType(PR); + } + + return VBPType; +} + +TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) { + // The null DIType is the void type. Don't try to hash it. + if (!Ty) + return TypeIndex::Void(); + + // Check if we've already translated this type. Don't try to do a + // get-or-create style insertion that caches the hash lookup across the + // lowerType call. It will update the TypeIndices map. + auto I = TypeIndices.find({Ty, ClassTy}); + if (I != TypeIndices.end()) + return I->second; + + TypeLoweringScope S(*this); + TypeIndex TI = lowerType(Ty, ClassTy); + return recordTypeIndexForDINode(Ty, TI, ClassTy); +} + +codeview::TypeIndex +CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy, + const DISubroutineType *SubroutineTy) { + assert(PtrTy->getTag() == dwarf::DW_TAG_pointer_type && + "this type must be a pointer type"); + + PointerOptions Options = PointerOptions::None; + if (SubroutineTy->getFlags() & DINode::DIFlags::FlagLValueReference) + Options = PointerOptions::LValueRefThisPointer; + else if (SubroutineTy->getFlags() & DINode::DIFlags::FlagRValueReference) + Options = PointerOptions::RValueRefThisPointer; + + // Check if we've already translated this type. If there is no ref qualifier + // on the function then we look up this pointer type with no associated class + // so that the TypeIndex for the this pointer can be shared with the type + // index for other pointers to this class type. If there is a ref qualifier + // then we lookup the pointer using the subroutine as the parent type. + auto I = TypeIndices.find({PtrTy, SubroutineTy}); + if (I != TypeIndices.end()) + return I->second; + + TypeLoweringScope S(*this); + TypeIndex TI = lowerTypePointer(PtrTy, Options); + return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy); +} + +TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) { + PointerRecord PR(getTypeIndex(Ty), + getPointerSizeInBytes() == 8 ? PointerKind::Near64 + : PointerKind::Near32, + PointerMode::LValueReference, PointerOptions::None, + Ty->getSizeInBits() / 8); + return TypeTable.writeLeafType(PR); +} + +TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) { + // The null DIType is the void type. Don't try to hash it. + if (!Ty) + return TypeIndex::Void(); + + // Look through typedefs when getting the complete type index. Call + // getTypeIndex on the typdef to ensure that any UDTs are accumulated and are + // emitted only once. + if (Ty->getTag() == dwarf::DW_TAG_typedef) + (void)getTypeIndex(Ty); + while (Ty->getTag() == dwarf::DW_TAG_typedef) + Ty = cast<DIDerivedType>(Ty)->getBaseType(); + + // If this is a non-record type, the complete type index is the same as the + // normal type index. Just call getTypeIndex. + switch (Ty->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + break; + default: + return getTypeIndex(Ty); + } + + const auto *CTy = cast<DICompositeType>(Ty); + + TypeLoweringScope S(*this); + + // Make sure the forward declaration is emitted first. It's unclear if this + // is necessary, but MSVC does it, and we should follow suit until we can show + // otherwise. + // We only emit a forward declaration for named types. + if (!CTy->getName().empty() || !CTy->getIdentifier().empty()) { + TypeIndex FwdDeclTI = getTypeIndex(CTy); + + // Just use the forward decl if we don't have complete type info. This + // might happen if the frontend is using modules and expects the complete + // definition to be emitted elsewhere. + if (CTy->isForwardDecl()) + return FwdDeclTI; + } + + // Check if we've already translated the complete record type. + // Insert the type with a null TypeIndex to signify that the type is currently + // being lowered. + auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()}); + if (!InsertResult.second) + return InsertResult.first->second; + + TypeIndex TI; + switch (CTy->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + TI = lowerCompleteTypeClass(CTy); + break; + case dwarf::DW_TAG_union_type: + TI = lowerCompleteTypeUnion(CTy); + break; + default: + llvm_unreachable("not a record"); + } + + // Update the type index associated with this CompositeType. This cannot + // use the 'InsertResult' iterator above because it is potentially + // invalidated by map insertions which can occur while lowering the class + // type above. + CompleteTypeIndices[CTy] = TI; + return TI; +} + +/// Emit all the deferred complete record types. Try to do this in FIFO order, +/// and do this until fixpoint, as each complete record type typically +/// references +/// many other record types. +void CodeViewDebug::emitDeferredCompleteTypes() { + SmallVector<const DICompositeType *, 4> TypesToEmit; + while (!DeferredCompleteTypes.empty()) { + std::swap(DeferredCompleteTypes, TypesToEmit); + for (const DICompositeType *RecordTy : TypesToEmit) + getCompleteTypeIndex(RecordTy); + TypesToEmit.clear(); + } +} + +void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI, + ArrayRef<LocalVariable> Locals) { + // Get the sorted list of parameters and emit them first. + SmallVector<const LocalVariable *, 6> Params; + for (const LocalVariable &L : Locals) + if (L.DIVar->isParameter()) + Params.push_back(&L); + llvm::sort(Params, [](const LocalVariable *L, const LocalVariable *R) { + return L->DIVar->getArg() < R->DIVar->getArg(); + }); + for (const LocalVariable *L : Params) + emitLocalVariable(FI, *L); + + // Next emit all non-parameters in the order that we found them. + for (const LocalVariable &L : Locals) + if (!L.DIVar->isParameter()) + emitLocalVariable(FI, L); +} + +void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI, + const LocalVariable &Var) { + // LocalSym record, see SymbolRecord.h for more info. + MCSymbol *LocalEnd = beginSymbolRecord(SymbolKind::S_LOCAL); + + LocalSymFlags Flags = LocalSymFlags::None; + if (Var.DIVar->isParameter()) + Flags |= LocalSymFlags::IsParameter; + if (Var.DefRanges.empty()) + Flags |= LocalSymFlags::IsOptimizedOut; + + OS.AddComment("TypeIndex"); + TypeIndex TI = Var.UseReferenceType + ? getTypeIndexForReferenceTo(Var.DIVar->getType()) + : getCompleteTypeIndex(Var.DIVar->getType()); + OS.EmitIntValue(TI.getIndex(), 4); + OS.AddComment("Flags"); + OS.EmitIntValue(static_cast<uint16_t>(Flags), 2); + // Truncate the name so we won't overflow the record length field. + emitNullTerminatedSymbolName(OS, Var.DIVar->getName()); + endSymbolRecord(LocalEnd); + + // Calculate the on disk prefix of the appropriate def range record. The + // records and on disk formats are described in SymbolRecords.h. BytePrefix + // should be big enough to hold all forms without memory allocation. + SmallString<20> BytePrefix; + for (const LocalVarDefRange &DefRange : Var.DefRanges) { + BytePrefix.clear(); + if (DefRange.InMemory) { + int Offset = DefRange.DataOffset; + unsigned Reg = DefRange.CVRegister; + + // 32-bit x86 call sequences often use PUSH instructions, which disrupt + // ESP-relative offsets. Use the virtual frame pointer, VFRAME or $T0, + // instead. In frames without stack realignment, $T0 will be the CFA. + if (RegisterId(Reg) == RegisterId::ESP) { + Reg = unsigned(RegisterId::VFRAME); + Offset += FI.OffsetAdjustment; + } + + // If we can use the chosen frame pointer for the frame and this isn't a + // sliced aggregate, use the smaller S_DEFRANGE_FRAMEPOINTER_REL record. + // Otherwise, use S_DEFRANGE_REGISTER_REL. + EncodedFramePtrReg EncFP = encodeFramePtrReg(RegisterId(Reg), TheCPU); + if (!DefRange.IsSubfield && EncFP != EncodedFramePtrReg::None && + (bool(Flags & LocalSymFlags::IsParameter) + ? (EncFP == FI.EncodedParamFramePtrReg) + : (EncFP == FI.EncodedLocalFramePtrReg))) { + DefRangeFramePointerRelHeader DRHdr; + DRHdr.Offset = Offset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + } else { + uint16_t RegRelFlags = 0; + if (DefRange.IsSubfield) { + RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag | + (DefRange.StructOffset + << DefRangeRegisterRelSym::OffsetInParentShift); + } + DefRangeRegisterRelHeader DRHdr; + DRHdr.Register = Reg; + DRHdr.Flags = RegRelFlags; + DRHdr.BasePointerOffset = Offset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + } + } else { + assert(DefRange.DataOffset == 0 && "unexpected offset into register"); + if (DefRange.IsSubfield) { + DefRangeSubfieldRegisterHeader DRHdr; + DRHdr.Register = DefRange.CVRegister; + DRHdr.MayHaveNoName = 0; + DRHdr.OffsetInParent = DefRange.StructOffset; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + } else { + DefRangeRegisterHeader DRHdr; + DRHdr.Register = DefRange.CVRegister; + DRHdr.MayHaveNoName = 0; + OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr); + } + } + } +} + +void CodeViewDebug::emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, + const FunctionInfo& FI) { + for (LexicalBlock *Block : Blocks) + emitLexicalBlock(*Block, FI); +} + +/// Emit an S_BLOCK32 and S_END record pair delimiting the contents of a +/// lexical block scope. +void CodeViewDebug::emitLexicalBlock(const LexicalBlock &Block, + const FunctionInfo& FI) { + MCSymbol *RecordEnd = beginSymbolRecord(SymbolKind::S_BLOCK32); + OS.AddComment("PtrParent"); + OS.EmitIntValue(0, 4); // PtrParent + OS.AddComment("PtrEnd"); + OS.EmitIntValue(0, 4); // PtrEnd + OS.AddComment("Code size"); + OS.emitAbsoluteSymbolDiff(Block.End, Block.Begin, 4); // Code Size + OS.AddComment("Function section relative address"); + OS.EmitCOFFSecRel32(Block.Begin, /*Offset=*/0); // Func Offset + OS.AddComment("Function section index"); + OS.EmitCOFFSectionIndex(FI.Begin); // Func Symbol + OS.AddComment("Lexical block name"); + emitNullTerminatedSymbolName(OS, Block.Name); // Name + endSymbolRecord(RecordEnd); + + // Emit variables local to this lexical block. + emitLocalVariableList(FI, Block.Locals); + emitGlobalVariableList(Block.Globals); + + // Emit lexical blocks contained within this block. + emitLexicalBlockList(Block.Children, FI); + + // Close the lexical block scope. + emitEndSymbolRecord(SymbolKind::S_END); +} + +/// Convenience routine for collecting lexical block information for a list +/// of lexical scopes. +void CodeViewDebug::collectLexicalBlockInfo( + SmallVectorImpl<LexicalScope *> &Scopes, + SmallVectorImpl<LexicalBlock *> &Blocks, + SmallVectorImpl<LocalVariable> &Locals, + SmallVectorImpl<CVGlobalVariable> &Globals) { + for (LexicalScope *Scope : Scopes) + collectLexicalBlockInfo(*Scope, Blocks, Locals, Globals); +} + +/// Populate the lexical blocks and local variable lists of the parent with +/// information about the specified lexical scope. +void CodeViewDebug::collectLexicalBlockInfo( + LexicalScope &Scope, + SmallVectorImpl<LexicalBlock *> &ParentBlocks, + SmallVectorImpl<LocalVariable> &ParentLocals, + SmallVectorImpl<CVGlobalVariable> &ParentGlobals) { + if (Scope.isAbstractScope()) + return; + + // Gather information about the lexical scope including local variables, + // global variables, and address ranges. + bool IgnoreScope = false; + auto LI = ScopeVariables.find(&Scope); + SmallVectorImpl<LocalVariable> *Locals = + LI != ScopeVariables.end() ? &LI->second : nullptr; + auto GI = ScopeGlobals.find(Scope.getScopeNode()); + SmallVectorImpl<CVGlobalVariable> *Globals = + GI != ScopeGlobals.end() ? GI->second.get() : nullptr; + const DILexicalBlock *DILB = dyn_cast<DILexicalBlock>(Scope.getScopeNode()); + const SmallVectorImpl<InsnRange> &Ranges = Scope.getRanges(); + + // Ignore lexical scopes which do not contain variables. + if (!Locals && !Globals) + IgnoreScope = true; + + // Ignore lexical scopes which are not lexical blocks. + if (!DILB) + IgnoreScope = true; + + // Ignore scopes which have too many address ranges to represent in the + // current CodeView format or do not have a valid address range. + // + // For lexical scopes with multiple address ranges you may be tempted to + // construct a single range covering every instruction where the block is + // live and everything in between. Unfortunately, Visual Studio only + // displays variables from the first matching lexical block scope. If the + // first lexical block contains exception handling code or cold code which + // is moved to the bottom of the routine creating a single range covering + // nearly the entire routine, then it will hide all other lexical blocks + // and the variables they contain. + if (Ranges.size() != 1 || !getLabelAfterInsn(Ranges.front().second)) + IgnoreScope = true; + + if (IgnoreScope) { + // This scope can be safely ignored and eliminating it will reduce the + // size of the debug information. Be sure to collect any variable and scope + // information from the this scope or any of its children and collapse them + // into the parent scope. + if (Locals) + ParentLocals.append(Locals->begin(), Locals->end()); + if (Globals) + ParentGlobals.append(Globals->begin(), Globals->end()); + collectLexicalBlockInfo(Scope.getChildren(), + ParentBlocks, + ParentLocals, + ParentGlobals); + return; + } + + // Create a new CodeView lexical block for this lexical scope. If we've + // seen this DILexicalBlock before then the scope tree is malformed and + // we can handle this gracefully by not processing it a second time. + auto BlockInsertion = CurFn->LexicalBlocks.insert({DILB, LexicalBlock()}); + if (!BlockInsertion.second) + return; + + // Create a lexical block containing the variables and collect the the + // lexical block information for the children. + const InsnRange &Range = Ranges.front(); + assert(Range.first && Range.second); + LexicalBlock &Block = BlockInsertion.first->second; + Block.Begin = getLabelBeforeInsn(Range.first); + Block.End = getLabelAfterInsn(Range.second); + assert(Block.Begin && "missing label for scope begin"); + assert(Block.End && "missing label for scope end"); + Block.Name = DILB->getName(); + if (Locals) + Block.Locals = std::move(*Locals); + if (Globals) + Block.Globals = std::move(*Globals); + ParentBlocks.push_back(&Block); + collectLexicalBlockInfo(Scope.getChildren(), + Block.Children, + Block.Locals, + Block.Globals); +} + +void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) { + const Function &GV = MF->getFunction(); + assert(FnDebugInfo.count(&GV)); + assert(CurFn == FnDebugInfo[&GV].get()); + + collectVariableInfo(GV.getSubprogram()); + + // Build the lexical block structure to emit for this routine. + if (LexicalScope *CFS = LScopes.getCurrentFunctionScope()) + collectLexicalBlockInfo(*CFS, + CurFn->ChildBlocks, + CurFn->Locals, + CurFn->Globals); + + // Clear the scope and variable information from the map which will not be + // valid after we have finished processing this routine. This also prepares + // the map for the subsequent routine. + ScopeVariables.clear(); + + // Don't emit anything if we don't have any line tables. + // Thunks are compiler-generated and probably won't have source correlation. + if (!CurFn->HaveLineInfo && !GV.getSubprogram()->isThunk()) { + FnDebugInfo.erase(&GV); + CurFn = nullptr; + return; + } + + CurFn->Annotations = MF->getCodeViewAnnotations(); + CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites(); + + CurFn->End = Asm->getFunctionEnd(); + + CurFn = nullptr; +} + +// Usable locations are valid with non-zero line numbers. A line number of zero +// corresponds to optimized code that doesn't have a distinct source location. +// In this case, we try to use the previous or next source location depending on +// the context. +static bool isUsableDebugLoc(DebugLoc DL) { + return DL && DL.getLine() != 0; +} + +void CodeViewDebug::beginInstruction(const MachineInstr *MI) { + DebugHandlerBase::beginInstruction(MI); + + // Ignore DBG_VALUE and DBG_LABEL locations and function prologue. + if (!Asm || !CurFn || MI->isDebugInstr() || + MI->getFlag(MachineInstr::FrameSetup)) + return; + + // If the first instruction of a new MBB has no location, find the first + // instruction with a location and use that. + DebugLoc DL = MI->getDebugLoc(); + if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) { + for (const auto &NextMI : *MI->getParent()) { + if (NextMI.isDebugInstr()) + continue; + DL = NextMI.getDebugLoc(); + if (isUsableDebugLoc(DL)) + break; + } + // FIXME: Handle the case where the BB has no valid locations. This would + // probably require doing a real dataflow analysis. + } + PrevInstBB = MI->getParent(); + + // If we still don't have a debug location, don't record a location. + if (!isUsableDebugLoc(DL)) + return; + + maybeRecordLocation(DL, Asm->MF); +} + +MCSymbol *CodeViewDebug::beginCVSubsection(DebugSubsectionKind Kind) { + MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), + *EndLabel = MMI->getContext().createTempSymbol(); + OS.EmitIntValue(unsigned(Kind), 4); + OS.AddComment("Subsection size"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 4); + OS.EmitLabel(BeginLabel); + return EndLabel; +} + +void CodeViewDebug::endCVSubsection(MCSymbol *EndLabel) { + OS.EmitLabel(EndLabel); + // Every subsection must be aligned to a 4-byte boundary. + OS.EmitValueToAlignment(4); +} + +static StringRef getSymbolName(SymbolKind SymKind) { + for (const EnumEntry<SymbolKind> &EE : getSymbolTypeNames()) + if (EE.Value == SymKind) + return EE.Name; + return ""; +} + +MCSymbol *CodeViewDebug::beginSymbolRecord(SymbolKind SymKind) { + MCSymbol *BeginLabel = MMI->getContext().createTempSymbol(), + *EndLabel = MMI->getContext().createTempSymbol(); + OS.AddComment("Record length"); + OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2); + OS.EmitLabel(BeginLabel); + if (OS.isVerboseAsm()) + OS.AddComment("Record kind: " + getSymbolName(SymKind)); + OS.EmitIntValue(unsigned(SymKind), 2); + return EndLabel; +} + +void CodeViewDebug::endSymbolRecord(MCSymbol *SymEnd) { + // MSVC does not pad out symbol records to four bytes, but LLVM does to avoid + // an extra copy of every symbol record in LLD. This increases object file + // size by less than 1% in the clang build, and is compatible with the Visual + // C++ linker. + OS.EmitValueToAlignment(4); + OS.EmitLabel(SymEnd); +} + +void CodeViewDebug::emitEndSymbolRecord(SymbolKind EndKind) { + OS.AddComment("Record length"); + OS.EmitIntValue(2, 2); + if (OS.isVerboseAsm()) + OS.AddComment("Record kind: " + getSymbolName(EndKind)); + OS.EmitIntValue(unsigned(EndKind), 2); // Record Kind +} + +void CodeViewDebug::emitDebugInfoForUDTs( + ArrayRef<std::pair<std::string, const DIType *>> UDTs) { + for (const auto &UDT : UDTs) { + const DIType *T = UDT.second; + assert(shouldEmitUdt(T)); + + MCSymbol *UDTRecordEnd = beginSymbolRecord(SymbolKind::S_UDT); + OS.AddComment("Type"); + OS.EmitIntValue(getCompleteTypeIndex(T).getIndex(), 4); + emitNullTerminatedSymbolName(OS, UDT.first); + endSymbolRecord(UDTRecordEnd); + } +} + +void CodeViewDebug::collectGlobalVariableInfo() { + DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *> + GlobalMap; + for (const GlobalVariable &GV : MMI->getModule()->globals()) { + SmallVector<DIGlobalVariableExpression *, 1> GVEs; + GV.getDebugInfo(GVEs); + for (const auto *GVE : GVEs) + GlobalMap[GVE] = &GV; + } + + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + for (const MDNode *Node : CUs->operands()) { + const auto *CU = cast<DICompileUnit>(Node); + for (const auto *GVE : CU->getGlobalVariables()) { + const DIGlobalVariable *DIGV = GVE->getVariable(); + const DIExpression *DIE = GVE->getExpression(); + + // Emit constant global variables in a global symbol section. + if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) { + CVGlobalVariable CVGV = {DIGV, DIE}; + GlobalVariables.emplace_back(std::move(CVGV)); + } + + const auto *GV = GlobalMap.lookup(GVE); + if (!GV || GV->isDeclarationForLinker()) + continue; + + DIScope *Scope = DIGV->getScope(); + SmallVector<CVGlobalVariable, 1> *VariableList; + if (Scope && isa<DILocalScope>(Scope)) { + // Locate a global variable list for this scope, creating one if + // necessary. + auto Insertion = ScopeGlobals.insert( + {Scope, std::unique_ptr<GlobalVariableList>()}); + if (Insertion.second) + Insertion.first->second = std::make_unique<GlobalVariableList>(); + VariableList = Insertion.first->second.get(); + } else if (GV->hasComdat()) + // Emit this global variable into a COMDAT section. + VariableList = &ComdatVariables; + else + // Emit this global variable in a single global symbol section. + VariableList = &GlobalVariables; + CVGlobalVariable CVGV = {DIGV, GV}; + VariableList->emplace_back(std::move(CVGV)); + } + } +} + +void CodeViewDebug::emitDebugInfoForGlobals() { + // First, emit all globals that are not in a comdat in a single symbol + // substream. MSVC doesn't like it if the substream is empty, so only open + // it if we have at least one global to emit. + switchToDebugSectionForSymbol(nullptr); + if (!GlobalVariables.empty()) { + OS.AddComment("Symbol subsection for globals"); + MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); + emitGlobalVariableList(GlobalVariables); + endCVSubsection(EndLabel); + } + + // Second, emit each global that is in a comdat into its own .debug$S + // section along with its own symbol substream. + for (const CVGlobalVariable &CVGV : ComdatVariables) { + const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>(); + MCSymbol *GVSym = Asm->getSymbol(GV); + OS.AddComment("Symbol subsection for " + + Twine(GlobalValue::dropLLVMManglingEscape(GV->getName()))); + switchToDebugSectionForSymbol(GVSym); + MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols); + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(CVGV); + endCVSubsection(EndLabel); + } +} + +void CodeViewDebug::emitDebugInfoForRetainedTypes() { + NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu"); + for (const MDNode *Node : CUs->operands()) { + for (auto *Ty : cast<DICompileUnit>(Node)->getRetainedTypes()) { + if (DIType *RT = dyn_cast<DIType>(Ty)) { + getTypeIndex(RT); + // FIXME: Add to global/local DTU list. + } + } + } +} + +// Emit each global variable in the specified array. +void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) { + for (const CVGlobalVariable &CVGV : Globals) { + // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions. + emitDebugInfoForGlobal(CVGV); + } +} + +void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) { + const DIGlobalVariable *DIGV = CVGV.DIGV; + if (const GlobalVariable *GV = + CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) { + // DataSym record, see SymbolRecord.h for more info. Thread local data + // happens to have the same format as global data. + MCSymbol *GVSym = Asm->getSymbol(GV); + SymbolKind DataSym = GV->isThreadLocal() + ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32 + : SymbolKind::S_GTHREAD32) + : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32 + : SymbolKind::S_GDATA32); + MCSymbol *DataEnd = beginSymbolRecord(DataSym); + OS.AddComment("Type"); + OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4); + OS.AddComment("DataOffset"); + OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0); + OS.AddComment("Segment"); + OS.EmitCOFFSectionIndex(GVSym); + OS.AddComment("Name"); + const unsigned LengthOfDataRecord = 12; + emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord); + endSymbolRecord(DataEnd); + } else { + // FIXME: Currently this only emits the global variables in the IR metadata. + // This should also emit enums and static data members. + const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>(); + assert(DIE->isConstant() && + "Global constant variables must contain a constant expression."); + uint64_t Val = DIE->getElement(1); + + MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT); + OS.AddComment("Type"); + OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4); + OS.AddComment("Value"); + + // Encoded integers shouldn't need more than 10 bytes. + uint8_t data[10]; + BinaryStreamWriter Writer(data, llvm::support::endianness::little); + CodeViewRecordIO IO(Writer); + cantFail(IO.mapEncodedInteger(Val)); + StringRef SRef((char *)data, Writer.getOffset()); + OS.EmitBinaryData(SRef); + + OS.AddComment("Name"); + const DIScope *Scope = DIGV->getScope(); + // For static data members, get the scope from the declaration. + if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>( + DIGV->getRawStaticDataMemberDeclaration())) + Scope = MemberDecl->getScope(); + emitNullTerminatedSymbolName(OS, + getFullyQualifiedName(Scope, DIGV->getName())); + endSymbolRecord(SConstantEnd); + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h new file mode 100644 index 000000000000..7ffd77926cf7 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -0,0 +1,469 @@ +//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Microsoft CodeView debug info. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/CodeGen/DebugHandlerBase.h" +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Compiler.h" +#include <cstdint> +#include <map> +#include <string> +#include <tuple> +#include <unordered_map> +#include <utility> +#include <vector> + +namespace llvm { + +struct ClassInfo; +class StringRef; +class AsmPrinter; +class Function; +class GlobalVariable; +class MCSectionCOFF; +class MCStreamer; +class MCSymbol; +class MachineFunction; + +/// Collects and handles line tables information in a CodeView format. +class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { + MCStreamer &OS; + BumpPtrAllocator Allocator; + codeview::GlobalTypeTableBuilder TypeTable; + + /// Whether to emit type record hashes into .debug$H. + bool EmitDebugGlobalHashes = false; + + /// The codeview CPU type used by the translation unit. + codeview::CPUType TheCPU; + + /// Represents the most general definition range. + struct LocalVarDefRange { + /// Indicates that variable data is stored in memory relative to the + /// specified register. + int InMemory : 1; + + /// Offset of variable data in memory. + int DataOffset : 31; + + /// Non-zero if this is a piece of an aggregate. + uint16_t IsSubfield : 1; + + /// Offset into aggregate. + uint16_t StructOffset : 15; + + /// Register containing the data or the register base of the memory + /// location containing the data. + uint16_t CVRegister; + + /// Compares all location fields. This includes all fields except the label + /// ranges. + bool isDifferentLocation(LocalVarDefRange &O) { + return InMemory != O.InMemory || DataOffset != O.DataOffset || + IsSubfield != O.IsSubfield || StructOffset != O.StructOffset || + CVRegister != O.CVRegister; + } + + SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges; + }; + + static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset); + + /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific. + struct LocalVariable { + const DILocalVariable *DIVar = nullptr; + SmallVector<LocalVarDefRange, 1> DefRanges; + bool UseReferenceType = false; + }; + + struct CVGlobalVariable { + const DIGlobalVariable *DIGV; + PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo; + }; + + struct InlineSite { + SmallVector<LocalVariable, 1> InlinedLocals; + SmallVector<const DILocation *, 1> ChildSites; + const DISubprogram *Inlinee = nullptr; + + /// The ID of the inline site or function used with .cv_loc. Not a type + /// index. + unsigned SiteFuncId = 0; + }; + + // Combines information from DILexicalBlock and LexicalScope. + struct LexicalBlock { + SmallVector<LocalVariable, 1> Locals; + SmallVector<CVGlobalVariable, 1> Globals; + SmallVector<LexicalBlock *, 1> Children; + const MCSymbol *Begin; + const MCSymbol *End; + StringRef Name; + }; + + // For each function, store a vector of labels to its instructions, as well as + // to the end of the function. + struct FunctionInfo { + FunctionInfo() = default; + + // Uncopyable. + FunctionInfo(const FunctionInfo &FI) = delete; + + /// Map from inlined call site to inlined instructions and child inlined + /// call sites. Listed in program order. + std::unordered_map<const DILocation *, InlineSite> InlineSites; + + /// Ordered list of top-level inlined call sites. + SmallVector<const DILocation *, 1> ChildSites; + + SmallVector<LocalVariable, 1> Locals; + SmallVector<CVGlobalVariable, 1> Globals; + + std::unordered_map<const DILexicalBlockBase*, LexicalBlock> LexicalBlocks; + + // Lexical blocks containing local variables. + SmallVector<LexicalBlock *, 1> ChildBlocks; + + std::vector<std::pair<MCSymbol *, MDNode *>> Annotations; + std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>> + HeapAllocSites; + + const MCSymbol *Begin = nullptr; + const MCSymbol *End = nullptr; + unsigned FuncId = 0; + unsigned LastFileId = 0; + + /// Number of bytes allocated in the prologue for all local stack objects. + unsigned FrameSize = 0; + + /// Number of bytes of parameters on the stack. + unsigned ParamSize = 0; + + /// Number of bytes pushed to save CSRs. + unsigned CSRSize = 0; + + /// Adjustment to apply on x86 when using the VFRAME frame pointer. + int OffsetAdjustment = 0; + + /// Two-bit value indicating which register is the designated frame pointer + /// register for local variables. Included in S_FRAMEPROC. + codeview::EncodedFramePtrReg EncodedLocalFramePtrReg = + codeview::EncodedFramePtrReg::None; + + /// Two-bit value indicating which register is the designated frame pointer + /// register for stack parameters. Included in S_FRAMEPROC. + codeview::EncodedFramePtrReg EncodedParamFramePtrReg = + codeview::EncodedFramePtrReg::None; + + codeview::FrameProcedureOptions FrameProcOpts; + + bool HasStackRealignment = false; + + bool HaveLineInfo = false; + }; + FunctionInfo *CurFn = nullptr; + + // Map used to seperate variables according to the lexical scope they belong + // in. This is populated by recordLocalVariable() before + // collectLexicalBlocks() separates the variables between the FunctionInfo + // and LexicalBlocks. + DenseMap<const LexicalScope *, SmallVector<LocalVariable, 1>> ScopeVariables; + + // Map to separate global variables according to the lexical scope they + // belong in. A null local scope represents the global scope. + typedef SmallVector<CVGlobalVariable, 1> GlobalVariableList; + DenseMap<const DIScope*, std::unique_ptr<GlobalVariableList> > ScopeGlobals; + + // Array of global variables which need to be emitted into a COMDAT section. + SmallVector<CVGlobalVariable, 1> ComdatVariables; + + // Array of non-COMDAT global variables. + SmallVector<CVGlobalVariable, 1> GlobalVariables; + + /// The set of comdat .debug$S sections that we've seen so far. Each section + /// must start with a magic version number that must only be emitted once. + /// This set tracks which sections we've already opened. + DenseSet<MCSectionCOFF *> ComdatDebugSections; + + /// Switch to the appropriate .debug$S section for GVSym. If GVSym, the symbol + /// of an emitted global value, is in a comdat COFF section, this will switch + /// to a new .debug$S section in that comdat. This method ensures that the + /// section starts with the magic version number on first use. If GVSym is + /// null, uses the main .debug$S section. + void switchToDebugSectionForSymbol(const MCSymbol *GVSym); + + /// The next available function index for use with our .cv_* directives. Not + /// to be confused with type indices for LF_FUNC_ID records. + unsigned NextFuncId = 0; + + InlineSite &getInlineSite(const DILocation *InlinedAt, + const DISubprogram *Inlinee); + + codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP); + + void calculateRanges(LocalVariable &Var, + const DbgValueHistoryMap::Entries &Entries); + + static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children, + const FunctionInfo &FI, + const InlineSite &Site); + + /// Remember some debug info about each function. Keep it in a stable order to + /// emit at the end of the TU. + MapVector<const Function *, std::unique_ptr<FunctionInfo>> FnDebugInfo; + + /// Map from full file path to .cv_file id. Full paths are built from DIFiles + /// and are stored in FileToFilepathMap; + DenseMap<StringRef, unsigned> FileIdMap; + + /// All inlined subprograms in the order they should be emitted. + SmallSetVector<const DISubprogram *, 4> InlinedSubprograms; + + /// Map from a pair of DI metadata nodes and its DI type (or scope) that can + /// be nullptr, to CodeView type indices. Primarily indexed by + /// {DIType*, DIType*} and {DISubprogram*, DIType*}. + /// + /// The second entry in the key is needed for methods as DISubroutineType + /// representing static method type are shared with non-method function type. + DenseMap<std::pair<const DINode *, const DIType *>, codeview::TypeIndex> + TypeIndices; + + /// Map from DICompositeType* to complete type index. Non-record types are + /// always looked up in the normal TypeIndices map. + DenseMap<const DICompositeType *, codeview::TypeIndex> CompleteTypeIndices; + + /// Complete record types to emit after all active type lowerings are + /// finished. + SmallVector<const DICompositeType *, 4> DeferredCompleteTypes; + + /// Number of type lowering frames active on the stack. + unsigned TypeEmissionLevel = 0; + + codeview::TypeIndex VBPType; + + const DISubprogram *CurrentSubprogram = nullptr; + + // The UDTs we have seen while processing types; each entry is a pair of type + // index and type name. + std::vector<std::pair<std::string, const DIType *>> LocalUDTs; + std::vector<std::pair<std::string, const DIType *>> GlobalUDTs; + + using FileToFilepathMapTy = std::map<const DIFile *, std::string>; + FileToFilepathMapTy FileToFilepathMap; + + StringRef getFullFilepath(const DIFile *File); + + unsigned maybeRecordFile(const DIFile *F); + + void maybeRecordLocation(const DebugLoc &DL, const MachineFunction *MF); + + void clear(); + + void setCurrentSubprogram(const DISubprogram *SP) { + CurrentSubprogram = SP; + LocalUDTs.clear(); + } + + /// Emit the magic version number at the start of a CodeView type or symbol + /// section. Appears at the front of every .debug$S or .debug$T or .debug$P + /// section. + void emitCodeViewMagicVersion(); + + void emitTypeInformation(); + + void emitTypeGlobalHashes(); + + void emitCompilerInformation(); + + void emitBuildInfo(); + + void emitInlineeLinesSubsection(); + + void emitDebugInfoForThunk(const Function *GV, + FunctionInfo &FI, + const MCSymbol *Fn); + + void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI); + + void emitDebugInfoForRetainedTypes(); + + void + emitDebugInfoForUDTs(ArrayRef<std::pair<std::string, const DIType *>> UDTs); + + void emitDebugInfoForGlobals(); + void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals); + void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV); + + /// Opens a subsection of the given kind in a .debug$S codeview section. + /// Returns an end label for use with endCVSubsection when the subsection is + /// finished. + MCSymbol *beginCVSubsection(codeview::DebugSubsectionKind Kind); + void endCVSubsection(MCSymbol *EndLabel); + + /// Opens a symbol record of the given kind. Returns an end label for use with + /// endSymbolRecord. + MCSymbol *beginSymbolRecord(codeview::SymbolKind Kind); + void endSymbolRecord(MCSymbol *SymEnd); + + /// Emits an S_END, S_INLINESITE_END, or S_PROC_ID_END record. These records + /// are empty, so we emit them with a simpler assembly sequence that doesn't + /// involve labels. + void emitEndSymbolRecord(codeview::SymbolKind EndKind); + + void emitInlinedCallSite(const FunctionInfo &FI, const DILocation *InlinedAt, + const InlineSite &Site); + + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; + + void collectGlobalVariableInfo(); + void collectVariableInfo(const DISubprogram *SP); + + void collectVariableInfoFromMFTable(DenseSet<InlinedEntity> &Processed); + + // Construct the lexical block tree for a routine, pruning emptpy lexical + // scopes, and populate it with local variables. + void collectLexicalBlockInfo(SmallVectorImpl<LexicalScope *> &Scopes, + SmallVectorImpl<LexicalBlock *> &Blocks, + SmallVectorImpl<LocalVariable> &Locals, + SmallVectorImpl<CVGlobalVariable> &Globals); + void collectLexicalBlockInfo(LexicalScope &Scope, + SmallVectorImpl<LexicalBlock *> &ParentBlocks, + SmallVectorImpl<LocalVariable> &ParentLocals, + SmallVectorImpl<CVGlobalVariable> &ParentGlobals); + + /// Records information about a local variable in the appropriate scope. In + /// particular, locals from inlined code live inside the inlining site. + void recordLocalVariable(LocalVariable &&Var, const LexicalScope *LS); + + /// Emits local variables in the appropriate order. + void emitLocalVariableList(const FunctionInfo &FI, + ArrayRef<LocalVariable> Locals); + + /// Emits an S_LOCAL record and its associated defined ranges. + void emitLocalVariable(const FunctionInfo &FI, const LocalVariable &Var); + + /// Emits a sequence of lexical block scopes and their children. + void emitLexicalBlockList(ArrayRef<LexicalBlock *> Blocks, + const FunctionInfo& FI); + + /// Emit a lexical block scope and its children. + void emitLexicalBlock(const LexicalBlock &Block, const FunctionInfo& FI); + + /// Translates the DIType to codeview if necessary and returns a type index + /// for it. + codeview::TypeIndex getTypeIndex(const DIType *Ty, + const DIType *ClassTy = nullptr); + + codeview::TypeIndex + getTypeIndexForThisPtr(const DIDerivedType *PtrTy, + const DISubroutineType *SubroutineTy); + + codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty); + + codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP, + const DICompositeType *Class); + + codeview::TypeIndex getScopeIndex(const DIScope *Scope); + + codeview::TypeIndex getVBPTypeIndex(); + + void addToUDTs(const DIType *Ty); + + void addUDTSrcLine(const DIType *Ty, codeview::TypeIndex TI); + + codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy); + codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty); + codeview::TypeIndex lowerTypePointer( + const DIDerivedType *Ty, + codeview::PointerOptions PO = codeview::PointerOptions::None); + codeview::TypeIndex lowerTypeMemberPointer( + const DIDerivedType *Ty, + codeview::PointerOptions PO = codeview::PointerOptions::None); + codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty); + codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty); + codeview::TypeIndex lowerTypeMemberFunction( + const DISubroutineType *Ty, const DIType *ClassTy, int ThisAdjustment, + bool IsStaticMethod, + codeview::FunctionOptions FO = codeview::FunctionOptions::None); + codeview::TypeIndex lowerTypeEnum(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeClass(const DICompositeType *Ty); + codeview::TypeIndex lowerTypeUnion(const DICompositeType *Ty); + + /// Symbol records should point to complete types, but type records should + /// always point to incomplete types to avoid cycles in the type graph. Only + /// use this entry point when generating symbol records. The complete and + /// incomplete type indices only differ for record types. All other types use + /// the same index. + codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty); + + codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty); + codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty); + + struct TypeLoweringScope; + + void emitDeferredCompleteTypes(); + + void collectMemberInfo(ClassInfo &Info, const DIDerivedType *DDTy); + ClassInfo collectClassInfo(const DICompositeType *Ty); + + /// Common record member lowering functionality for record types, which are + /// structs, classes, and unions. Returns the field list index and the member + /// count. + std::tuple<codeview::TypeIndex, codeview::TypeIndex, unsigned, bool> + lowerRecordFieldList(const DICompositeType *Ty); + + /// Inserts {{Node, ClassTy}, TI} into TypeIndices and checks for duplicates. + codeview::TypeIndex recordTypeIndexForDINode(const DINode *Node, + codeview::TypeIndex TI, + const DIType *ClassTy = nullptr); + + unsigned getPointerSizeInBytes(); + +protected: + /// Gather pre-function debug information. + void beginFunctionImpl(const MachineFunction *MF) override; + + /// Gather post-function debug information. + void endFunctionImpl(const MachineFunction *) override; + +public: + CodeViewDebug(AsmPrinter *AP); + + void setSymbolSize(const MCSymbol *, uint64_t) override {} + + /// Emit the COFF section that holds the line table information. + void endModule() override; + + /// Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 000000000000..f4134da48caa --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,817 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DIE.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); + if (Form == dwarf::DW_FORM_implicit_const) + ID.AddInteger(Value); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(unsigned(Tag)); + ID.AddInteger(unsigned(Children)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(const AsmPrinter *AP) const { + // Emit its Dwarf tag type. + AP->EmitULEB128(Tag, dwarf::TagString(Tag).data()); + + // Emit whether it has children DIEs. + AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data()); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + AP->EmitULEB128(AttrData.getAttribute(), + dwarf::AttributeString(AttrData.getAttribute()).data()); + + // Emit form type. +#ifndef NDEBUG + // Could be an assertion, but this way we can see the failing form code + // easily, which helps track down where it came from. + if (!dwarf::isValidFormForVersion(AttrData.getForm(), + AP->getDwarfVersion())) { + LLVM_DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) + << " for DWARF version " << AP->getDwarfVersion() + << "\n"); + llvm_unreachable("Invalid form for specified DWARF version"); + } +#endif + AP->EmitULEB128(AttrData.getForm(), + dwarf::FormEncodingString(AttrData.getForm()).data()); + + // Emit value for DW_FORM_implicit_const. + if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) + AP->EmitSLEB128(AttrData.getValue()); + } + + // Mark end of abbreviation. + AP->EmitULEB128(0, "EOM(1)"); + AP->EmitULEB128(0, "EOM(2)"); +} + +LLVM_DUMP_METHOD +void DIEAbbrev::print(raw_ostream &O) const { + O << "Abbreviation @" + << format("0x%lx", (long)(intptr_t)this) + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(Children) + << '\n'; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()); + + if (Data[i].getForm() == dwarf::DW_FORM_implicit_const) + O << " " << Data[i].getValue(); + + O << '\n'; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DIEAbbrev::dump() const { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEAbbrevSet Implementation +//===----------------------------------------------------------------------===// + +DIEAbbrevSet::~DIEAbbrevSet() { + for (DIEAbbrev *Abbrev : Abbreviations) + Abbrev->~DIEAbbrev(); +} + +DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) { + + FoldingSetNodeID ID; + DIEAbbrev Abbrev = Die.generateAbbrev(); + Abbrev.Profile(ID); + + void *InsertPos; + if (DIEAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Die.setAbbrevNumber(Existing->getNumber()); + return *Existing; + } + + // Move the abbreviation to the heap and assign a number. + DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev)); + Abbreviations.push_back(New); + New->setNumber(Abbreviations.size()); + Die.setAbbrevNumber(Abbreviations.size()); + + // Store it for lookup. + AbbreviationsSet.InsertNode(New, InsertPos); + return *New; +} + +void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const { + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + AP->OutStreamer->SwitchSection(Section); + AP->emitDwarfAbbrevs(Abbreviations); + } +} + +//===----------------------------------------------------------------------===// +// DIE Implementation +//===----------------------------------------------------------------------===// + +DIE *DIE::getParent() const { + return Owner.dyn_cast<DIE*>(); +} + +DIEAbbrev DIE::generateAbbrev() const { + DIEAbbrev Abbrev(Tag, hasChildren()); + for (const DIEValue &V : values()) + if (V.getForm() == dwarf::DW_FORM_implicit_const) + Abbrev.AddImplicitConstAttribute(V.getAttribute(), + V.getDIEInteger().getValue()); + else + Abbrev.AddAttribute(V.getAttribute(), V.getForm()); + return Abbrev; +} + +unsigned DIE::getDebugSectionOffset() const { + const DIEUnit *Unit = getUnit(); + assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset"); + return Unit->getDebugSectionOffset() + getOffset(); +} + +const DIE *DIE::getUnitDie() const { + const DIE *p = this; + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_type_unit) + return p; + p = p->getParent(); + } + return nullptr; +} + +DIEUnit *DIE::getUnit() const { + const DIE *UnitDie = getUnitDie(); + if (UnitDie) + return UnitDie->Owner.dyn_cast<DIEUnit*>(); + return nullptr; +} + +DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (const auto &V : values()) + if (V.getAttribute() == Attribute) + return V; + return DIEValue(); +} + +LLVM_DUMP_METHOD +static void printValues(raw_ostream &O, const DIEValueList &Values, + StringRef Type, unsigned Size, unsigned IndentCount) { + O << Type << ": Size: " << Size << "\n"; + + unsigned I = 0; + const std::string Indent(IndentCount, ' '); + for (const auto &V : Values.values()) { + O << Indent; + O << "Blk[" << I++ << "]"; + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } +} + +LLVM_DUMP_METHOD +void DIE::print(raw_ostream &O, unsigned IndentCount) const { + const std::string Indent(IndentCount, ' '); + O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this) + << ", Offset: " << Offset << ", Size: " << Size << "\n"; + + O << Indent << dwarf::TagString(getTag()) << " " + << dwarf::ChildrenString(hasChildren()) << "\n"; + + IndentCount += 2; + for (const auto &V : values()) { + O << Indent; + O << dwarf::AttributeString(V.getAttribute()); + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } + IndentCount -= 2; + + for (const auto &Child : children()) + Child.print(O, IndentCount + 4); + + O << "\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DIE::dump() const { + print(dbgs()); +} +#endif + +unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP, + DIEAbbrevSet &AbbrevSet, + unsigned CUOffset) { + // Unique the abbreviation and fill in the abbreviation number so this DIE + // can be emitted. + const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this); + + // Set compile/type unit relative offset of this DIE. + setOffset(CUOffset); + + // Add the byte size of the abbreviation code. + CUOffset += getULEB128Size(getAbbrevNumber()); + + // Add the byte size of all the DIE attribute values. + for (const auto &V : values()) + CUOffset += V.SizeOf(AP); + + // Let the children compute their offsets and abbreviation numbers. + if (hasChildren()) { + (void)Abbrev; + assert(Abbrev.hasChildren() && "Children flag not set"); + + for (auto &Child : children()) + CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset); + + // Each child chain is terminated with a zero byte, adjust the offset. + CUOffset += sizeof(int8_t); + } + + // Compute the byte size of this DIE and all of its children correctly. This + // is needed so that top level DIE can help the compile unit set its length + // correctly. + setSize(CUOffset - getOffset()); + return CUOffset; +} + +//===----------------------------------------------------------------------===// +// DIEUnit Implementation +//===----------------------------------------------------------------------===// +DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag) + : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V), + AddrSize(A) +{ + Die.Owner = this; + assert((UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_type_unit || + UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG"); +} + +void DIEValue::EmitValue(const AsmPrinter *AP) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().EmitValue(AP, Form); \ + break; +#include "llvm/CodeGen/DIEValue.def" + } +} + +unsigned DIEValue::SizeOf(const AsmPrinter *AP) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + return getDIE##T().SizeOf(AP, Form); +#include "llvm/CodeGen/DIEValue.def" + } + llvm_unreachable("Unknown DIE kind"); +} + +LLVM_DUMP_METHOD +void DIEValue::print(raw_ostream &O) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().print(O); \ + break; +#include "llvm/CodeGen/DIEValue.def" + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DIEValue::dump() const { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_implicit_const: + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + Asm->OutStreamer->AddBlankLine(); + return; + case dwarf::DW_FORM_flag: + case dwarf::DW_FORM_ref1: + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_strx1: + case dwarf::DW_FORM_addrx1: + case dwarf::DW_FORM_ref2: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_strx2: + case dwarf::DW_FORM_addrx2: + case dwarf::DW_FORM_strx3: + case dwarf::DW_FORM_strp: + case dwarf::DW_FORM_ref4: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_ref_sup4: + case dwarf::DW_FORM_strx4: + case dwarf::DW_FORM_addrx4: + case dwarf::DW_FORM_ref8: + case dwarf::DW_FORM_ref_sig8: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_ref_sup8: + case dwarf::DW_FORM_GNU_ref_alt: + case dwarf::DW_FORM_GNU_strp_alt: + case dwarf::DW_FORM_line_strp: + case dwarf::DW_FORM_sec_offset: + case dwarf::DW_FORM_strp_sup: + case dwarf::DW_FORM_addr: + case dwarf::DW_FORM_ref_addr: + Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form)); + return; + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_GNU_addr_index: + case dwarf::DW_FORM_ref_udata: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_addrx: + case dwarf::DW_FORM_rnglistx: + case dwarf::DW_FORM_udata: + Asm->EmitULEB128(Integer); + return; + case dwarf::DW_FORM_sdata: + Asm->EmitSLEB128(Integer); + return; + default: llvm_unreachable("DIE Value form not supported yet"); + } +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + dwarf::FormParams Params = {0, 0, dwarf::DWARF32}; + if (AP) + Params = {AP->getDwarfVersion(), uint8_t(AP->getPointerSize()), + AP->OutStreamer->getContext().getDwarfFormat()}; + + if (Optional<uint8_t> FixedSize = dwarf::getFixedFormByteSize(Form, Params)) + return *FixedSize; + + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_GNU_addr_index: + case dwarf::DW_FORM_ref_udata: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_addrx: + case dwarf::DW_FORM_rnglistx: + case dwarf::DW_FORM_udata: + return getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: + return getSLEB128Size(Integer); + default: llvm_unreachable("DIE Value form not supported yet"); + } +} + +LLVM_DUMP_METHOD +void DIEInteger::print(raw_ostream &O) const { + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); +} + +//===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitDebugValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getPointerSize(); +} + +LLVM_DUMP_METHOD +void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } + +//===----------------------------------------------------------------------===// +// DIELabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr || + Form == dwarf::DW_FORM_data4); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->MAI->getCodePointerSize(); +} + +LLVM_DUMP_METHOD +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } + +//===----------------------------------------------------------------------===// +// DIEBaseTypeRef Implementation +//===----------------------------------------------------------------------===// + +void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset(); + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + AP->EmitULEB128(Offset, nullptr, ULEB128PadSize); +} + +unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + return ULEB128PadSize; +} + +LLVM_DUMP_METHOD +void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; } + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->MAI->getCodePointerSize(); +} + +LLVM_DUMP_METHOD +void DIEDelta::print(raw_ostream &O) const { + O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); +} + +//===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + // Index of string in symbol table. + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_strx1: + case dwarf::DW_FORM_strx2: + case dwarf::DW_FORM_strx3: + case dwarf::DW_FORM_strx4: + DIEInteger(S.getIndex()).EmitValue(AP, Form); + return; + case dwarf::DW_FORM_strp: + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + DIELabel(S.getSymbol()).EmitValue(AP, Form); + else + DIEInteger(S.getOffset()).EmitValue(AP, Form); + return; + default: + llvm_unreachable("Expected valid string form"); + } +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + // Index of string in symbol table. + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + case dwarf::DW_FORM_strx1: + case dwarf::DW_FORM_strx2: + case dwarf::DW_FORM_strx3: + case dwarf::DW_FORM_strx4: + return DIEInteger(S.getIndex()).SizeOf(AP, Form); + case dwarf::DW_FORM_strp: + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + return DIELabel(S.getSymbol()).SizeOf(AP, Form); + return DIEInteger(S.getOffset()).SizeOf(AP, Form); + default: + llvm_unreachable("Expected valid string form"); + } +} + +LLVM_DUMP_METHOD +void DIEString::print(raw_ostream &O) const { + O << "String: " << S.getString(); +} + +//===----------------------------------------------------------------------===// +// DIEInlineString Implementation +//===----------------------------------------------------------------------===// +void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_string) { + AP->OutStreamer->EmitBytes(S); + AP->emitInt8(0); + return; + } + llvm_unreachable("Expected valid string form"); +} + +unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + // Emit string bytes + NULL byte. + return S.size() + 1; +} + +LLVM_DUMP_METHOD +void DIEInlineString::print(raw_ostream &O) const { + O << "InlineString: " << S; +} + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + + switch (Form) { + case dwarf::DW_FORM_ref1: + case dwarf::DW_FORM_ref2: + case dwarf::DW_FORM_ref4: + case dwarf::DW_FORM_ref8: + AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form)); + return; + + case dwarf::DW_FORM_ref_udata: + AP->EmitULEB128(Entry->getOffset()); + return; + + case dwarf::DW_FORM_ref_addr: { + // Get the absolute offset for this DIE within the debug info/types section. + unsigned Addr = Entry->getDebugSectionOffset(); + if (const MCSymbol *SectionSym = + Entry->getUnit()->getCrossSectionRelativeBaseAddress()) { + AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true); + return; + } + + AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form)); + return; + } + default: + llvm_unreachable("Improper form for DIE reference"); + } +} + +unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_ref1: + return 1; + case dwarf::DW_FORM_ref2: + return 2; + case dwarf::DW_FORM_ref4: + return 4; + case dwarf::DW_FORM_ref8: + return 8; + case dwarf::DW_FORM_ref_udata: + return getULEB128Size(Entry->getOffset()); + case dwarf::DW_FORM_ref_addr: + if (AP->getDwarfVersion() == 2) + return AP->MAI->getCodePointerSize(); + switch (AP->OutStreamer->getContext().getDwarfFormat()) { + case dwarf::DWARF32: + return 4; + case dwarf::DWARF64: + return 8; + } + llvm_unreachable("Invalid DWARF format"); + + default: + llvm_unreachable("Improper form for DIE reference"); + } +} + +LLVM_DUMP_METHOD +void DIEEntry::print(raw_ostream &O) const { + O << format("Die: 0x%lx", (long)(intptr_t)&Entry); +} + +//===----------------------------------------------------------------------===// +// DIELoc Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the location expression. +/// +unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { + if (!Size) { + for (const auto &V : values()) + Size += V.SizeOf(AP); + } + + return Size; +} + +/// EmitValue - Emit location data. +/// +void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + Asm->EmitULEB128(Size); break; + } + + for (const auto &V : values()) + V.EmitValue(Asm); +} + +/// SizeOf - Determine size of location data in bytes. +/// +unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + return Size + getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +LLVM_DUMP_METHOD +void DIELoc::print(raw_ostream &O) const { + printValues(O, *this, "ExprLoc", Size, 5); +} + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { + if (!Size) { + for (const auto &V : values()) + Size += V.SizeOf(AP); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->emitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->emitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->emitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + case dwarf::DW_FORM_string: break; + case dwarf::DW_FORM_data16: break; + } + + for (const auto &V : values()) + V.EmitValue(Asm); +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); + case dwarf::DW_FORM_data16: return 16; + default: llvm_unreachable("Improper form for block"); + } +} + +LLVM_DUMP_METHOD +void DIEBlock::print(raw_ostream &O) const { + printValues(O, *this, "Blk", Size, 5); +} + +//===----------------------------------------------------------------------===// +// DIELocList Implementation +//===----------------------------------------------------------------------===// + +unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) + return 4; + if (Form == dwarf::DW_FORM_sec_offset) + return 4; + return AP->MAI->getCodePointerSize(); +} + +/// EmitValue - Emit label value. +/// +void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + DwarfDebug *DD = AP->getDwarfDebug(); + MCSymbol *Label = DD->getDebugLocs().getList(Index).Label; + AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf()); +} + +LLVM_DUMP_METHOD +void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 000000000000..bfac8850a2a6 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,424 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "DIEHash.h" +#include "ByteStreamer.h" +#include "DwarfDebug.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +/// Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (const auto &V : Die.values()) + if (V.getAttribute() == Attr) + return V.getDIEString().getString(); + + return StringRef(""); +} + +/// Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + LLVM_DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + LLVM_DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + LLVM_DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<const DIE *, 1> Parents; + const DIE *Cur = &Parent; + while (Cur->getParent()) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || + Cur->getTag() == dwarf::DW_TAG_type_unit); + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { + + for (const auto &V : Die.values()) { + LLVM_DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(V.getAttribute()) + << " added.\n"); + switch (V.getAttribute()) { +#define HANDLE_DIE_HASH_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME = V; \ + break; +#include "DIEHashAttributes.def" + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash all of the values in a block like set of values. This assumes that +// all of the data is going to be added as integers. +void DIEHash::hashBlockData(const DIE::const_value_range &Values) { + for (const auto &V : Values) + Hash.update((uint64_t)V.getDIEInteger().getValue()); +} + +// Hash the contents of a loclistptr class. +void DIEHash::hashLocList(const DIELocList &LocList) { + HashingByteStreamer Streamer(*this); + DwarfDebug &DD = *AP->getDwarfDebug(); + const DebugLocStream &Locs = DD.getDebugLocs(); + for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) + DD.emitDebugLocEntry(Streamer, Entry, nullptr); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { + dwarf::Attribute Attribute = Value.getAttribute(); + + // Other attribute values use the letter 'A' as the marker, and the value + // consists of the form code (encoded as an unsigned LEB128 value) followed by + // the encoding of the value according to the form code. To ensure + // reproducibility of the signature, the set of forms used in the signature + // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, + // DW_FORM_string, and DW_FORM_block. + + switch (Value.getType()) { + case DIEValue::isNone: + llvm_unreachable("Expected valid DIEValue"); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + case DIEValue::isEntry: + hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry()); + break; + case DIEValue::isInteger: { + addULEB128('A'); + addULEB128(Attribute); + switch (Value.getForm()) { + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + case dwarf::DW_FORM_sdata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)Value.getDIEInteger().getValue()); + break; + // DW_FORM_flag_present is just flag with a value of one. We still give it a + // value so just use the value. + case dwarf::DW_FORM_flag_present: + case dwarf::DW_FORM_flag: + addULEB128(dwarf::DW_FORM_flag); + addULEB128((int64_t)Value.getDIEInteger().getValue()); + break; + default: + llvm_unreachable("Unknown integer form!"); + } + break; + } + case DIEValue::isString: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_string); + addString(Value.getDIEString().getString()); + break; + case DIEValue::isInlineString: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_string); + addString(Value.getDIEInlineString().getString()); + break; + case DIEValue::isBlock: + case DIEValue::isLoc: + case DIEValue::isLocList: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_block); + if (Value.getType() == DIEValue::isBlock) { + addULEB128(Value.getDIEBlock().ComputeSize(AP)); + hashBlockData(Value.getDIEBlock().values()); + } else if (Value.getType() == DIEValue::isLoc) { + addULEB128(Value.getDIELoc().ComputeSize(AP)); + hashBlockData(Value.getDIELoc().values()); + } else { + // We could add the block length, but that would take + // a bit of work and not add a lot of uniqueness + // to the hash in some way we could test. + hashLocList(Value.getDIELocList()); + } + break; + // FIXME: It's uncertain whether or not we should handle this at the moment. + case DIEValue::isExpr: + case DIEValue::isLabel: + case DIEValue::isBaseTypeRef: + case DIEValue::isDelta: + llvm_unreachable("Add support for additional value types."); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define HANDLE_DIE_HASH_ATTR(NAME) \ + { \ + if (Attrs.NAME) \ + hashAttribute(Attrs.NAME, Tag); \ + } +#include "DIEHashAttributes.def" + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (auto &C : Die.children()) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(C, Name); + continue; + } + } + computeHash(C); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (!DWOName.empty()) + Hash.update(DWOName); + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, so we actually + // need the "high" word. + return Result.high(); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 000000000000..2e49514c98be --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,109 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class AsmPrinter; +class CompileUnit; + +/// An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { +#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME; +#include "DIEHashAttributes.def" + }; + +public: + DIEHash(AsmPrinter *A = nullptr) : AP(A) {} + + /// Computes the CU signature. + uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); + + /// Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// Adds the parent context of \param Parent to the hash. + void addParentContext(const DIE &Parent); + + /// Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +public: + /// Adds \param Value to the hash. + void update(uint8_t Value) { Hash.update(Value); } + + /// Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + +private: + /// Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// Hashes the data in a block like DIEValue, e.g. DW_FORM_block or + /// DW_FORM_exprloc. + void hashBlockData(const DIE::const_value_range &Values); + + /// Hashes the contents pointed to in the .debug_loc section. + void hashLocList(const DIELocList &LocList); + + /// Hashes an individual attribute. + void hashAttribute(const DIEValue &Value, dwarf::Tag Tag); + + /// Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + AsmPrinter *AP; + DenseMap<const DIE *, unsigned> Numbering; +}; +} + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def new file mode 100644 index 000000000000..28a02390fccb --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -0,0 +1,55 @@ +#ifndef HANDLE_DIE_HASH_ATTR +#error "Missing macro definition of HANDLE_DIE_HASH_ATTR" +#endif + +HANDLE_DIE_HASH_ATTR(DW_AT_name) +HANDLE_DIE_HASH_ATTR(DW_AT_accessibility) +HANDLE_DIE_HASH_ATTR(DW_AT_address_class) +HANDLE_DIE_HASH_ATTR(DW_AT_allocated) +HANDLE_DIE_HASH_ATTR(DW_AT_artificial) +HANDLE_DIE_HASH_ATTR(DW_AT_associated) +HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_size) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_size) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_const_expr) +HANDLE_DIE_HASH_ATTR(DW_AT_const_value) +HANDLE_DIE_HASH_ATTR(DW_AT_containing_type) +HANDLE_DIE_HASH_ATTR(DW_AT_count) +HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_data_location) +HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign) +HANDLE_DIE_HASH_ATTR(DW_AT_default_value) +HANDLE_DIE_HASH_ATTR(DW_AT_digit_count) +HANDLE_DIE_HASH_ATTR(DW_AT_discr) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_list) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_value) +HANDLE_DIE_HASH_ATTR(DW_AT_encoding) +HANDLE_DIE_HASH_ATTR(DW_AT_enum_class) +HANDLE_DIE_HASH_ATTR(DW_AT_endianity) +HANDLE_DIE_HASH_ATTR(DW_AT_explicit) +HANDLE_DIE_HASH_ATTR(DW_AT_is_optional) +HANDLE_DIE_HASH_ATTR(DW_AT_location) +HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_mutable) +HANDLE_DIE_HASH_ATTR(DW_AT_ordering) +HANDLE_DIE_HASH_ATTR(DW_AT_picture_string) +HANDLE_DIE_HASH_ATTR(DW_AT_prototyped) +HANDLE_DIE_HASH_ATTR(DW_AT_small) +HANDLE_DIE_HASH_ATTR(DW_AT_segment) +HANDLE_DIE_HASH_ATTR(DW_AT_string_length) +HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled) +HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_use_location) +HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8) +HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter) +HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) +HANDLE_DIE_HASH_ATTR(DW_AT_visibility) +HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) +HANDLE_DIE_HASH_ATTR(DW_AT_type) + +#undef HANDLE_DIE_HASH_ATTR diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp new file mode 100644 index 000000000000..7f9d6c618ad3 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp @@ -0,0 +1,375 @@ +//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <map> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +namespace { +using EntryIndex = DbgValueHistoryMap::EntryIndex; +} + +// If @MI is a DBG_VALUE with debug value described by a +// defined register, returns the number of this register. +// In the other case, returns 0. +static Register isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 4); + // If the location of variable is an entry value (DW_OP_LLVM_entry_value) + // do not consider it as a register location. + if (MI.getDebugExpression()->isEntryValue()) + return 0; + // If location of variable is described using a register (directly or + // indirectly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register(); +} + +bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var, + const MachineInstr &MI, + EntryIndex &NewIndex) { + // Instruction range should start with a DBG_VALUE instruction for the + // variable. + assert(MI.isDebugValue() && "not a DBG_VALUE"); + auto &Entries = VarEntries[Var]; + if (!Entries.empty() && Entries.back().isDbgValue() && + !Entries.back().isClosed() && + Entries.back().getInstr()->isIdenticalTo(MI)) { + LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Entries.back().getInstr() << "\t" << MI + << "\n"); + return false; + } + Entries.emplace_back(&MI, Entry::DbgValue); + NewIndex = Entries.size() - 1; + return true; +} + +EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var, + const MachineInstr &MI) { + auto &Entries = VarEntries[Var]; + // If an instruction clobbers multiple registers that the variable is + // described by, then we may have already created a clobbering instruction. + if (Entries.back().isClobber() && Entries.back().getInstr() == &MI) + return Entries.size() - 1; + Entries.emplace_back(&MI, Entry::Clobber); + return Entries.size() - 1; +} + +void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) { + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(isDbgValue() && "Setting end index for non-debug value"); + assert(!isClosed() && "End index has already been set"); + EndIndex = Index; +} + +void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) { + assert(MI.isDebugLabel() && "not a DBG_LABEL"); + LabelInstr[Label] = &MI; +} + +namespace { + +// Maps physreg numbers to the variables they describe. +using InlinedEntity = DbgValueHistoryMap::InlinedEntity; +using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>; + +// Keeps track of the debug value entries that are currently live for each +// inlined entity. As the history map entries are stored in a SmallVector, they +// may be moved at insertion of new entries, so store indices rather than +// pointers. +using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>; + +} // end anonymous namespace + +// Claim that @Var is not described by @RegNo anymore. +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedEntity Var) { + const auto &I = RegVars.find(RegNo); + assert(RegNo != 0U && I != RegVars.end()); + auto &VarSet = I->second; + const auto &VarPos = llvm::find(VarSet, Var); + assert(VarPos != VarSet.end()); + VarSet.erase(VarPos); + // Don't keep empty sets in a map to keep it as small as possible. + if (VarSet.empty()) + RegVars.erase(I); +} + +// Claim that @Var is now described by @RegNo. +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedEntity Var) { + assert(RegNo != 0U); + auto &VarSet = RegVars[RegNo]; + assert(!is_contained(VarSet, Var)); + VarSet.push_back(Var); +} + +/// Create a clobbering entry and end all open debug value entries +/// for \p Var that are described by \p RegNo using that entry. +static void clobberRegEntries(InlinedEntity Var, unsigned RegNo, + const MachineInstr &ClobberingInstr, + DbgValueEntriesMap &LiveEntries, + DbgValueHistoryMap &HistMap) { + EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr); + + // Close all entries whose values are described by the register. + SmallVector<EntryIndex, 4> IndicesToErase; + for (auto Index : LiveEntries[Var]) { + auto &Entry = HistMap.getEntry(Var, Index); + assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries"); + if (isDescribedByReg(*Entry.getInstr()) == RegNo) { + IndicesToErase.push_back(Index); + Entry.endEntry(ClobberIndex); + } + } + + // Drop all entries that have ended. + for (auto Index : IndicesToErase) + LiveEntries[Var].erase(Index); +} + +/// Add a new debug value for \p Var. Closes all overlapping debug values. +static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV, + RegDescribedVarsMap &RegVars, + DbgValueEntriesMap &LiveEntries, + DbgValueHistoryMap &HistMap) { + EntryIndex NewIndex; + if (HistMap.startDbgValue(Var, DV, NewIndex)) { + SmallDenseMap<unsigned, bool, 4> TrackedRegs; + + // If we have created a new debug value entry, close all preceding + // live entries that overlap. + SmallVector<EntryIndex, 4> IndicesToErase; + const DIExpression *DIExpr = DV.getDebugExpression(); + for (auto Index : LiveEntries[Var]) { + auto &Entry = HistMap.getEntry(Var, Index); + assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries"); + const MachineInstr &DV = *Entry.getInstr(); + bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression()); + if (Overlaps) { + IndicesToErase.push_back(Index); + Entry.endEntry(NewIndex); + } + if (Register Reg = isDescribedByReg(DV)) + TrackedRegs[Reg] |= !Overlaps; + } + + // If the new debug value is described by a register, add tracking of + // that register if it is not already tracked. + if (Register NewReg = isDescribedByReg(DV)) { + if (!TrackedRegs.count(NewReg)) + addRegDescribedVar(RegVars, NewReg, Var); + LiveEntries[Var].insert(NewIndex); + TrackedRegs[NewReg] = true; + } + + // Drop tracking of registers that are no longer used. + for (auto I : TrackedRegs) + if (!I.second) + dropRegDescribedVar(RegVars, I.first, Var); + + // Drop all entries that have ended, and mark the new entry as live. + for (auto Index : IndicesToErase) + LiveEntries[Var].erase(Index); + LiveEntries[Var].insert(NewIndex); + } +} + +// Terminate the location range for variables described by register at +// @I by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + RegDescribedVarsMap::iterator I, + DbgValueHistoryMap &HistMap, + DbgValueEntriesMap &LiveEntries, + const MachineInstr &ClobberingInstr) { + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap); + RegVars.erase(I); +} + +// Terminate the location range for variables described by register +// @RegNo by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, + DbgValueHistoryMap &HistMap, + DbgValueEntriesMap &LiveEntries, + const MachineInstr &ClobberingInstr) { + const auto &I = RegVars.find(RegNo); + if (I == RegVars.end()) + return; + clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr); +} + +void llvm::calculateDbgEntityHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &DbgValues, + DbgLabelInstrMap &DbgLabels) { + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FrameReg = TRI->getFrameRegister(*MF); + RegDescribedVarsMap RegVars; + DbgValueEntriesMap LiveEntries; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (MI.isDebugValue()) { + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); + // Use the base variable (without any DW_OP_piece expressions) + // as index into History. The full variables including the + // piece expressions are attached to the MI. + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues); + } else if (MI.isDebugLabel()) { + assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!"); + const DILabel *RawLabel = MI.getDebugLabel(); + assert(RawLabel->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + // When collecting debug information for labels, there is no MCSymbol + // generated for it. So, we keep MachineInstr in DbgLabels in order + // to query MCSymbol afterward. + InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt()); + DbgLabels.addInstr(L, MI); + } + + if (MI.isDebugInstr()) + continue; + + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg()) { + // Ignore call instructions that claim to clobber SP. The AArch64 + // backend does this for aggregate function arguments. + if (MI.isCall() && MO.getReg() == SP) + continue; + // If this is a virtual register, only clobber it since it doesn't + // have aliases. + if (Register::isVirtualRegister(MO.getReg())) + clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries, + MI); + // If this is a register def operand, it may end a debug value + // range. Ignore frame-register defs in the epilogue and prologue, + // we expect debuggers to understand that stack-locations are + // invalid outside of the function body. + else if (MO.getReg() != FrameReg || + (!MI.getFlag(MachineInstr::FrameDestroy) && + !MI.getFlag(MachineInstr::FrameSetup))) { + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); + ++AI) + clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI); + } + } else if (MO.isRegMask()) { + // If this is a register mask operand, clobber all debug values in + // non-CSRs. + SmallVector<unsigned, 32> RegsToClobber; + // Don't consider SP to be clobbered by register masks. + for (auto It : RegVars) { + unsigned int Reg = It.first; + if (Reg != SP && Register::isPhysicalRegister(Reg) && + MO.clobbersPhysReg(Reg)) + RegsToClobber.push_back(Reg); + } + + for (unsigned Reg : RegsToClobber) { + clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI); + } + } + } // End MO loop. + } // End instr loop. + + // Make sure locations for all variables are valid only until the end of + // the basic block (unless it's the last basic block, in which case let + // their liveness run off to the end of the function). + if (!MBB.empty() && &MBB != &MF->back()) { + // Iterate over all variables that have open debug values. + for (auto &Pair : LiveEntries) { + if (Pair.second.empty()) + continue; + + // Create a clobbering entry. + EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back()); + + // End all entries. + for (EntryIndex Idx : Pair.second) { + DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx); + assert(Ent.isDbgValue() && !Ent.isClosed()); + Ent.endEntry(ClobIdx); + } + } + + LiveEntries.clear(); + RegVars.clear(); + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const { + dbgs() << "DbgValueHistoryMap:\n"; + for (const auto &VarRangePair : *this) { + const InlinedEntity &Var = VarRangePair.first; + const Entries &Entries = VarRangePair.second; + + const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first); + const DILocation *Location = Var.second; + + dbgs() << " - " << LocalVar->getName() << " at "; + + if (Location) + dbgs() << Location->getFilename() << ":" << Location->getLine() << ":" + << Location->getColumn(); + else + dbgs() << "<unknown location>"; + + dbgs() << " --\n"; + + for (const auto &E : enumerate(Entries)) { + const auto &Entry = E.value(); + dbgs() << " Entry[" << E.index() << "]: "; + if (Entry.isDbgValue()) + dbgs() << "Debug value\n"; + else + dbgs() << "Clobber\n"; + dbgs() << " Instr: " << *Entry.getInstr(); + if (Entry.isDbgValue()) { + if (Entry.getEndIndex() == NoEntry) + dbgs() << " - Valid until end of function\n"; + else + dbgs() << " - Closed by Entry[" << Entry.getEndIndex() << "]\n"; + } + dbgs() << "\n"; + } + } +} +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp new file mode 100644 index 000000000000..22f458e4b03e --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -0,0 +1,344 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Common functionality for different debug information format backends. +// LLVM currently supports DWARF and CodeView. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DebugHandlerBase.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +Optional<DbgVariableLocation> +DbgVariableLocation::extractFromMachineInstruction( + const MachineInstr &Instruction) { + DbgVariableLocation Location; + if (!Instruction.isDebugValue()) + return None; + if (!Instruction.getOperand(0).isReg()) + return None; + Location.Register = Instruction.getOperand(0).getReg(); + Location.FragmentInfo.reset(); + // We only handle expressions generated by DIExpression::appendOffset, + // which doesn't require a full stack machine. + int64_t Offset = 0; + const DIExpression *DIExpr = Instruction.getDebugExpression(); + auto Op = DIExpr->expr_op_begin(); + while (Op != DIExpr->expr_op_end()) { + switch (Op->getOp()) { + case dwarf::DW_OP_constu: { + int Value = Op->getArg(0); + ++Op; + if (Op != DIExpr->expr_op_end()) { + switch (Op->getOp()) { + case dwarf::DW_OP_minus: + Offset -= Value; + break; + case dwarf::DW_OP_plus: + Offset += Value; + break; + default: + continue; + } + } + } break; + case dwarf::DW_OP_plus_uconst: + Offset += Op->getArg(0); + break; + case dwarf::DW_OP_LLVM_fragment: + Location.FragmentInfo = {Op->getArg(1), Op->getArg(0)}; + break; + case dwarf::DW_OP_deref: + Location.LoadChain.push_back(Offset); + Offset = 0; + break; + default: + return None; + } + ++Op; + } + + // Do one final implicit DW_OP_deref if this was an indirect DBG_VALUE + // instruction. + // FIXME: Replace these with DIExpression. + if (Instruction.isIndirectDebugValue()) + Location.LoadChain.push_back(Offset); + + return Location; +} + +DebugHandlerBase::DebugHandlerBase(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} + +// Each LexicalScope has first instruction and last instruction to mark +// beginning and end of a scope respectively. Create an inverse map that list +// scopes starts (and ends) with an instruction. One instruction may start (or +// end) multiple scopes. Ignore scopes that are not reachable. +void DebugHandlerBase::identifyScopeMarkers() { + SmallVector<LexicalScope *, 4> WorkList; + WorkList.push_back(LScopes.getCurrentFunctionScope()); + while (!WorkList.empty()) { + LexicalScope *S = WorkList.pop_back_val(); + + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); + if (!Children.empty()) + WorkList.append(Children.begin(), Children.end()); + + if (S->isAbstractScope()) + continue; + + for (const InsnRange &R : S->getRanges()) { + assert(R.first && "InsnRange does not have first instruction!"); + assert(R.second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(R.first); + requestLabelAfterInsn(R.second); + } + } +} + +// Return Label preceding the instruction. +MCSymbol *DebugHandlerBase::getLabelBeforeInsn(const MachineInstr *MI) { + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; +} + +// Return Label immediately following the instruction. +MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) { + return LabelsAfterInsn.lookup(MI); +} + +// Return the function-local offset of an instruction. +const MCExpr * +DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) { + MCContext &MC = Asm->OutContext; + + MCSymbol *Start = Asm->getFunctionBegin(); + const auto *StartRef = MCSymbolRefExpr::create(Start, MC); + + MCSymbol *AfterInsn = getLabelAfterInsn(MI); + assert(AfterInsn && "Expected label after instruction"); + const auto *AfterRef = MCSymbolRefExpr::create(AfterInsn, MC); + + return MCBinaryExpr::createSub(AfterRef, StartRef, MC); +} + +/// If this type is derived from a base type then return base type size. +uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) { + assert(Ty); + const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty); + if (!DDTy) + return Ty->getSizeInBits(); + + unsigned Tag = DDTy->getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type) + return DDTy->getSizeInBits(); + + DIType *BaseType = DDTy->getBaseType(); + + if (!BaseType) + return 0; + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType->getTag() == dwarf::DW_TAG_reference_type || + BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty->getSizeInBits(); + + return getBaseTypeSize(BaseType); +} + +static bool hasDebugInfo(const MachineModuleInfo *MMI, + const MachineFunction *MF) { + if (!MMI->hasDebugInfo()) + return false; + auto *SP = MF->getFunction().getSubprogram(); + if (!SP) + return false; + assert(SP->getUnit()); + auto EK = SP->getUnit()->getEmissionKind(); + if (EK == DICompileUnit::NoDebug) + return false; + return true; +} + +void DebugHandlerBase::beginFunction(const MachineFunction *MF) { + PrevInstBB = nullptr; + + if (!Asm || !hasDebugInfo(MMI, MF)) { + skippedNonDebugFunction(); + return; + } + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. + LScopes.initialize(*MF); + if (LScopes.empty()) { + beginFunctionImpl(MF); + return; + } + + // Make sure that each lexical scope will have a begin/end label. + identifyScopeMarkers(); + + // Calculate history for local variables. + assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); + assert(DbgLabels.empty() && "DbgLabels map wasn't cleaned!"); + calculateDbgEntityHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), + DbgValues, DbgLabels); + LLVM_DEBUG(DbgValues.dump()); + + // Request labels for the full history. + for (const auto &I : DbgValues) { + const auto &Entries = I.second; + if (Entries.empty()) + continue; + + auto IsDescribedByReg = [](const MachineInstr *MI) { + return MI->getOperand(0).isReg() && MI->getOperand(0).getReg(); + }; + + // The first mention of a function argument gets the CurrentFnBegin label, + // so arguments are visible when breaking at function entry. + // + // We do not change the label for values that are described by registers, + // as that could place them above their defining instructions. We should + // ideally not change the labels for constant debug values either, since + // doing that violates the ranges that are calculated in the history map. + // However, we currently do not emit debug values for constant arguments + // directly at the start of the function, so this code is still useful. + const DILocalVariable *DIVar = + Entries.front().getInstr()->getDebugVariable(); + if (DIVar->isParameter() && + getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) { + if (!IsDescribedByReg(Entries.front().getInstr())) + LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin(); + if (Entries.front().getInstr()->getDebugExpression()->isFragment()) { + // Mark all non-overlapping initial fragments. + for (auto I = Entries.begin(); I != Entries.end(); ++I) { + if (!I->isDbgValue()) + continue; + const DIExpression *Fragment = I->getInstr()->getDebugExpression(); + if (std::any_of(Entries.begin(), I, + [&](DbgValueHistoryMap::Entry Pred) { + return Pred.isDbgValue() && + Fragment->fragmentsOverlap( + Pred.getInstr()->getDebugExpression()); + })) + break; + // The code that generates location lists for DWARF assumes that the + // entries' start labels are monotonically increasing, and since we + // don't change the label for fragments that are described by + // registers, we must bail out when encountering such a fragment. + if (IsDescribedByReg(I->getInstr())) + break; + LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin(); + } + } + } + + for (const auto &Entry : Entries) { + if (Entry.isDbgValue()) + requestLabelBeforeInsn(Entry.getInstr()); + else + requestLabelAfterInsn(Entry.getInstr()); + } + } + + // Ensure there is a symbol before DBG_LABEL. + for (const auto &I : DbgLabels) { + const MachineInstr *MI = I.second; + requestLabelBeforeInsn(MI); + } + + PrevInstLoc = DebugLoc(); + PrevLabel = Asm->getFunctionBegin(); + beginFunctionImpl(MF); +} + +void DebugHandlerBase::beginInstruction(const MachineInstr *MI) { + if (!MMI->hasDebugInfo()) + return; + + assert(CurMI == nullptr); + CurMI = MI; + + // Insert labels where requested. + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); + + // No label needed. + if (I == LabelsBeforeInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +void DebugHandlerBase::endInstruction() { + if (!MMI->hasDebugInfo()) + return; + + assert(CurMI != nullptr); + // Don't create a new label after DBG_VALUE and other instructions that don't + // generate code. + if (!CurMI->isMetaInstruction()) { + PrevLabel = nullptr; + PrevInstBB = CurMI->getParent(); + } + + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = nullptr; + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +void DebugHandlerBase::endFunction(const MachineFunction *MF) { + if (hasDebugInfo(MMI, MF)) + endFunctionImpl(MF); + DbgValues.clear(); + DbgLabels.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h new file mode 100644 index 000000000000..17e39b3d3268 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -0,0 +1,184 @@ +//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H + +#include "DebugLocStream.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Debug.h" + +namespace llvm { +class AsmPrinter; + +/// A single location or constant. +class DbgValueLoc { + /// Any complex address location expression for this DbgValueLoc. + const DIExpression *Expression; + + /// Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; + + /// Either a constant, + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constant; + + /// Or a location in the machine frame. + MachineLocation Loc; + +public: + DbgValueLoc(const DIExpression *Expr, int64_t i) + : Expression(Expr), EntryKind(E_Integer) { + Constant.Int = i; + } + DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP) + : Expression(Expr), EntryKind(E_ConstantFP) { + Constant.CFP = CFP; + } + DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP) + : Expression(Expr), EntryKind(E_ConstantInt) { + Constant.CIP = CIP; + } + DbgValueLoc(const DIExpression *Expr, MachineLocation Loc) + : Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(cast<DIExpression>(Expr)->isValid()); + } + + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constant.Int; } + const ConstantFP *getConstantFP() const { return Constant.CFP; } + const ConstantInt *getConstantInt() const { return Constant.CIP; } + MachineLocation getLoc() const { return Loc; } + bool isFragment() const { return getExpression()->isFragment(); } + bool isEntryVal() const { return getExpression()->isEntryValue(); } + const DIExpression *getExpression() const { return Expression; } + friend bool operator==(const DbgValueLoc &, const DbgValueLoc &); + friend bool operator<(const DbgValueLoc &, const DbgValueLoc &); +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + LLVM_DUMP_METHOD void dump() const { + if (isLocation()) { + llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " "; + if (Loc.isIndirect()) + llvm::dbgs() << "+0"; + llvm::dbgs() << "} "; + } else if (isConstantInt()) + Constant.CIP->dump(); + else if (isConstantFP()) + Constant.CFP->dump(); + if (Expression) + Expression->dump(); + } +#endif +}; + +/// This struct describes location entries emitted in the .debug_loc +/// section. +class DebugLocEntry { + /// Begin and end symbols for the address range that this location is valid. + const MCSymbol *Begin; + const MCSymbol *End; + + /// A nonempty list of locations/constants belonging to this entry, + /// sorted by offset. + SmallVector<DbgValueLoc, 1> Values; + +public: + /// Create a location list entry for the range [\p Begin, \p End). + /// + /// \param Vals One or more values describing (parts of) the variable. + DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End, + ArrayRef<DbgValueLoc> Vals) + : Begin(Begin), End(End) { + addValues(Vals); + } + + /// Attempt to merge this DebugLocEntry with Next and return + /// true if the merge was successful. Entries can be merged if they + /// share the same Loc/Constant and if Next immediately follows this + /// Entry. + bool MergeRanges(const DebugLocEntry &Next) { + // If this and Next are describing the same variable, merge them. + if ((End == Next.Begin && Values == Next.Values)) { + End = Next.End; + return true; + } + return false; + } + + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + ArrayRef<DbgValueLoc> getValues() const { return Values; } + void addValues(ArrayRef<DbgValueLoc> Vals) { + Values.append(Vals.begin(), Vals.end()); + sortUniqueValues(); + assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) { + return V.isFragment(); + })) && "must either have a single value or multiple pieces"); + } + + // Sort the pieces by offset. + // Remove any duplicate entries by dropping all but the first. + void sortUniqueValues() { + llvm::sort(Values); + Values.erase(std::unique(Values.begin(), Values.end(), + [](const DbgValueLoc &A, const DbgValueLoc &B) { + return A.getExpression() == B.getExpression(); + }), + Values.end()); + } + + /// Lower this entry into a DWARF expression. + void finalize(const AsmPrinter &AP, + DebugLocStream::ListBuilder &List, + const DIBasicType *BT, + DwarfCompileUnit &TheCU); +}; + +/// Compare two DbgValueLocs for equality. +inline bool operator==(const DbgValueLoc &A, + const DbgValueLoc &B) { + if (A.EntryKind != B.EntryKind) + return false; + + if (A.Expression != B.Expression) + return false; + + switch (A.EntryKind) { + case DbgValueLoc::E_Location: + return A.Loc == B.Loc; + case DbgValueLoc::E_Integer: + return A.Constant.Int == B.Constant.Int; + case DbgValueLoc::E_ConstantFP: + return A.Constant.CFP == B.Constant.CFP; + case DbgValueLoc::E_ConstantInt: + return A.Constant.CIP == B.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); +} + +/// Compare two fragments based on their offset. +inline bool operator<(const DbgValueLoc &A, + const DbgValueLoc &B) { + return A.getExpression()->getFragmentInfo()->OffsetInBits < + B.getExpression()->getFragmentInfo()->OffsetInBits; +} + +} + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp new file mode 100644 index 000000000000..f483d532ff07 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp @@ -0,0 +1,45 @@ +//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DebugLocStream.h" +#include "DwarfDebug.h" +#include "llvm/CodeGen/AsmPrinter.h" + +using namespace llvm; + +bool DebugLocStream::finalizeList(AsmPrinter &Asm) { + if (Lists.back().EntryOffset == Entries.size()) { + // Empty list. Delete it. + Lists.pop_back(); + return false; + } + + // Real list. Generate a label for it. + Lists.back().Label = Asm.createTempSymbol("debug_loc"); + return true; +} + +void DebugLocStream::finalizeEntry() { + if (Entries.back().ByteOffset != DWARFBytes.size()) + return; + + // The last entry was empty. Delete it. + Comments.erase(Comments.begin() + Entries.back().CommentOffset, + Comments.end()); + Entries.pop_back(); + + assert(Lists.back().EntryOffset <= Entries.size() && + "Popped off more entries than are in the list"); +} + +DebugLocStream::ListBuilder::~ListBuilder() { + if (!Locs.finalizeList(Asm)) + return; + V.initializeDbgValue(&MI); + V.setDebugLocListIndex(ListIndex); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h new file mode 100644 index 000000000000..0db86b09d19a --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -0,0 +1,195 @@ +//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H + +#include "ByteStreamer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class AsmPrinter; +class DbgVariable; +class DwarfCompileUnit; +class MachineInstr; +class MCSymbol; + +/// Byte stream of .debug_loc entries. +/// +/// Stores a unified stream of .debug_loc entries. There's \a List for each +/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry. +/// +/// FIXME: Do we need all these temp symbols? +/// FIXME: Why not output directly to the output stream? +class DebugLocStream { +public: + struct List { + DwarfCompileUnit *CU; + MCSymbol *Label = nullptr; + size_t EntryOffset; + List(DwarfCompileUnit *CU, size_t EntryOffset) + : CU(CU), EntryOffset(EntryOffset) {} + }; + struct Entry { + const MCSymbol *Begin; + const MCSymbol *End; + size_t ByteOffset; + size_t CommentOffset; + }; + +private: + SmallVector<List, 4> Lists; + SmallVector<Entry, 32> Entries; + SmallString<256> DWARFBytes; + std::vector<std::string> Comments; + MCSymbol *Sym; + + /// Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. + bool GenerateComments; + +public: + DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { } + size_t getNumLists() const { return Lists.size(); } + const List &getList(size_t LI) const { return Lists[LI]; } + ArrayRef<List> getLists() const { return Lists; } + MCSymbol *getSym() const { + return Sym; + } + void setSym(MCSymbol *Sym) { + this->Sym = Sym; + } + + class ListBuilder; + class EntryBuilder; + +private: + /// Start a new .debug_loc entry list. + /// + /// Start a new .debug_loc entry list. Return the new list's index so it can + /// be retrieved later via \a getList(). + /// + /// Until the next call, \a startEntry() will add entries to this list. + size_t startList(DwarfCompileUnit *CU) { + size_t LI = Lists.size(); + Lists.emplace_back(CU, Entries.size()); + return LI; + } + + /// Finalize a .debug_loc entry list. + /// + /// If there are no entries in this list, delete it outright. Otherwise, + /// create a label with \a Asm. + /// + /// \return false iff the list is deleted. + bool finalizeList(AsmPrinter &Asm); + + /// Start a new .debug_loc entry. + /// + /// Until the next call, bytes added to the stream will be added to this + /// entry. + void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { + Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()}); + } + + /// Finalize a .debug_loc entry, deleting if it's empty. + void finalizeEntry(); + +public: + BufferByteStreamer getStreamer() { + return BufferByteStreamer(DWARFBytes, Comments, GenerateComments); + } + + ArrayRef<Entry> getEntries(const List &L) const { + size_t LI = getIndex(L); + return makeArrayRef(Entries) + .slice(Lists[LI].EntryOffset, getNumEntries(LI)); + } + + ArrayRef<char> getBytes(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end()) + .slice(Entries[EI].ByteOffset, getNumBytes(EI)); + } + ArrayRef<std::string> getComments(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(Comments) + .slice(Entries[EI].CommentOffset, getNumComments(EI)); + } + +private: + size_t getIndex(const List &L) const { + assert(&Lists.front() <= &L && &L <= &Lists.back() && + "Expected valid list"); + return &L - &Lists.front(); + } + size_t getIndex(const Entry &E) const { + assert(&Entries.front() <= &E && &E <= &Entries.back() && + "Expected valid entry"); + return &E - &Entries.front(); + } + size_t getNumEntries(size_t LI) const { + if (LI + 1 == Lists.size()) + return Entries.size() - Lists[LI].EntryOffset; + return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset; + } + size_t getNumBytes(size_t EI) const { + if (EI + 1 == Entries.size()) + return DWARFBytes.size() - Entries[EI].ByteOffset; + return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset; + } + size_t getNumComments(size_t EI) const { + if (EI + 1 == Entries.size()) + return Comments.size() - Entries[EI].CommentOffset; + return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset; + } +}; + +/// Builder for DebugLocStream lists. +class DebugLocStream::ListBuilder { + DebugLocStream &Locs; + AsmPrinter &Asm; + DbgVariable &V; + const MachineInstr &MI; + size_t ListIndex; + +public: + ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, + DbgVariable &V, const MachineInstr &MI) + : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {} + + /// Finalize the list. + /// + /// If the list is empty, delete it. Otherwise, finalize it by creating a + /// temp symbol in \a Asm and setting up the \a DbgVariable. + ~ListBuilder(); + + DebugLocStream &getLocs() { return Locs; } +}; + +/// Builder for DebugLocStream entries. +class DebugLocStream::EntryBuilder { + DebugLocStream &Locs; + +public: + EntryBuilder(ListBuilder &List, const MCSymbol *Begin, const MCSymbol *End) + : Locs(List.getLocs()) { + Locs.startEntry(Begin, End); + } + + /// Finalize the entry, deleting it if it's empty. + ~EntryBuilder() { Locs.finalizeEntry(); } + + BufferByteStreamer getStreamer() { return Locs.getStreamer(); } +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp new file mode 100644 index 000000000000..207a7284dafa --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -0,0 +1,171 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) + : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {} + +void DwarfCFIExceptionBase::markFunctionEnd() { + endFragment(); + + // Map all labels and get rid of any dead landing pads. + if (!Asm->MF->getLandingPads().empty()) { + MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF); + NonConstMF->tidyLandingPads(); + } +} + +void DwarfCFIExceptionBase::endFragment() { + if (shouldEmitCFI) + Asm->OutStreamer->EmitCFIEndProc(); +} + +DwarfCFIException::DwarfCFIException(AsmPrinter *A) + : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), + forceEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false) {} + +DwarfCFIException::~DwarfCFIException() {} + +/// endModule - Emit all exception information that should come after the +/// content. +void DwarfCFIException::endModule() { + // SjLj uses this pass and it doesn't need this info. + if (!Asm->MAI->usesCFIForEH()) + return; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) + return; + + // Emit references to all used personality functions + for (const Function *Personality : MMI->getPersonalities()) { + if (!Personality) + continue; + MCSymbol *Sym = Asm->getSymbol(Personality); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); + } +} + +static MCSymbol *getExceptionSym(AsmPrinter *Asm) { + return Asm->getCurExceptionSym(); +} + +void DwarfCFIException::beginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + const Function &F = MF->getFunction(); + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MF->getLandingPads().empty(); + + // See if we need frame move info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + + shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = nullptr; + if (F.hasPersonalityFn()) + Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + + // Emit a personality function even when there are no landing pads + forceEmitPersonality = + // ...if a personality function is explicitly specified + F.hasPersonalityFn() && + // ... and it's not known to be a noop in the absence of invokes + !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + // ... and we're not explicitly asked not to emit it + F.needsUnwindTableEntry(); + + shouldEmitPersonality = + (forceEmitPersonality || + (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) && + Per; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + shouldEmitCFI = MF->getMMI().getContext().getAsmInfo()->usesCFIForEH() && + (shouldEmitPersonality || shouldEmitMoves); + beginFragment(&*MF->begin(), getExceptionSym); +} + +void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB, + ExceptionSymbolProvider ESP) { + if (!shouldEmitCFI) + return; + + if (!hasEmittedCFISections) { + if (Asm->needsOnlyDebugCFIMoves()) + Asm->OutStreamer->EmitCFISections(false, true); + hasEmittedCFISections = true; + } + + Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) + return; + + auto &F = MBB->getParent()->getFunction(); + auto *P = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + assert(P && "Expected personality function"); + + // If we are forced to emit this personality, make sure to record + // it because it might not appear in any landingpad + if (forceEmitPersonality) + MMI->addPersonality(P); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI); + Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); + + // Provide LSDA information. + if (shouldEmitLSDA) + Asm->OutStreamer->EmitCFILsda(ESP(Asm), TLOF.getLSDAEncoding()); +} + +/// endFunction - Gather and emit post-function exception information. +/// +void DwarfCFIException::endFunction(const MachineFunction *MF) { + if (!shouldEmitPersonality) + return; + + emitExceptionTable(); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 000000000000..a61c98ec1c18 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,1318 @@ +//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for constructing a dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#include "DwarfCompileUnit.h" +#include "AddressPool.h" +#include "DwarfDebug.h" +#include "DwarfExpression.h" +#include "DwarfUnit.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Casting.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <iterator> +#include <memory> +#include <string> +#include <utility> + +using namespace llvm; + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), UniqueID(UID) { + insertDIE(Node, &getUnitDie()); + MacroLabelBegin = Asm->createTempSymbol("cu_macro_begin"); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + // Don't use the address pool in non-fission or in the skeleton unit itself. + // FIXME: Once GDB supports this, it's probably worthwhile using the address + // pool from the skeleton - maybe even in non-fission (possibly fewer + // relocations by sharing them in the pool, but we have other ideas about how + // to reduce the number of relocations as well/instead). + if ((!DD->useSplitDwarf() || !Skeleton) && DD->getDwarfVersion() < 5) + return addLocalLabelAddress(Die, Attribute, Label); + + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + unsigned idx = DD->getAddressPool().getIndex(Label); + Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 5 ? dwarf::DW_FORM_addrx + : dwarf::DW_FORM_GNU_addr_index, + DIEInteger(idx)); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + if (Label) + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIELabel(Label)); + else + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIEInteger(0)); +} + +unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID(); + if (!File) + return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID); + return Asm->OutStreamer->EmitDwarfFileDirective( + 0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), + File->getSource(), CUID); +} + +DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( + const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) { + // Check for pre-existence. + if (DIE *Die = getDIE(GV)) + return Die; + + assert(GV); + + auto *GVContext = GV->getScope(); + const DIType *GTy = GV->getType(); + + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr; + DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs) + : getOrCreateContextDIE(GVContext); + + // Add to map. + DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); + DIScope *DeclContext; + if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { + DeclContext = SDMDecl->getScope(); + assert(SDMDecl->isStaticMember() && "Expected static member decl"); + assert(GV->isDefinition()); + // We need the declaration DIE that is in the static member's class. + DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); + addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); + // If the global variable's type is different from the one in the class + // member type, assume that it's more specific and also emit it. + if (GTy != SDMDecl->getBaseType()) + addType(*VariableDIE, GTy); + } else { + DeclContext = GV->getScope(); + // Add name and type. + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); + addType(*VariableDIE, GTy); + + // Add scoping info. + if (!GV->isLocalToUnit()) + addFlag(*VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(*VariableDIE, GV); + } + + if (!GV->isDefinition()) + addFlag(*VariableDIE, dwarf::DW_AT_declaration); + else + addGlobalName(GV->getName(), *VariableDIE, DeclContext); + + if (uint32_t AlignInBytes = GV->getAlignInBytes()) + addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + + if (MDTuple *TP = GV->getTemplateParams()) + addTemplateParams(*VariableDIE, DINodeArray(TP)); + + // Add location. + addLocationAttribute(VariableDIE, GV, GlobalExprs); + + return VariableDIE; +} + +void DwarfCompileUnit::addLocationAttribute( + DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) { + bool addToAccelTable = false; + DIELoc *Loc = nullptr; + Optional<unsigned> NVPTXAddressSpace; + std::unique_ptr<DIEDwarfExpression> DwarfExpr; + for (const auto &GE : GlobalExprs) { + const GlobalVariable *Global = GE.Var; + const DIExpression *Expr = GE.Expr; + + // For compatibility with DWARF 3 and earlier, + // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes + // DW_AT_const_value(X). + if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) { + addToAccelTable = true; + addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1)); + break; + } + + // We cannot describe the location of dllimport'd variables: the + // computation of their address requires loads from the IAT. + if (Global && Global->hasDLLImportStorageClass()) + continue; + + // Nothing to describe without address or constant. + if (!Global && (!Expr || !Expr->isConstant())) + continue; + + if (Global && Global->isThreadLocal() && + !Asm->getObjFileLowering().supportDebugThreadLocalLocation()) + continue; + + if (!Loc) { + addToAccelTable = true; + Loc = new (DIEValueAllocator) DIELoc; + DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc); + } + + if (Expr) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } + DwarfExpr->addFragmentOffset(Expr); + } + + if (Global) { + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { + if (Asm->TM.useEmulatedTLS()) { + // TODO: add debug info for emulated thread local mode. + } else { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); + } + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + } + } + // Global variables attached to symbols are memory locations. + // It would be better if this were unconditional, but malformed input that + // mixes non-fragments and fragments for the same variable is too expensive + // to detect in the verifier. + if (DwarfExpr->isUnknownLocation()) + DwarfExpr->setMemoryLocationKind(); + DwarfExpr->addExpression(Expr); + } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_global_space = 5; + addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space); + } + if (Loc) + addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); + + if (DD->useAllLinkageNames()) + addLinkageName(*VariableDIE, GV->getLinkageName()); + + if (addToAccelTable) { + DD->addAccelName(*CUNode, GV->getName(), *VariableDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName() && + DD->useAllLinkageNames()) + DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE); + } +} + +DIE *DwarfCompileUnit::getOrCreateCommonBlock( + const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(CB->getScope()); + + if (DIE *NDie = getDIE(CB)) + return NDie; + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB); + StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName(); + addString(NDie, dwarf::DW_AT_name, Name); + addGlobalName(Name, NDie, CB->getScope()); + if (CB->getFile()) + addSourceLine(NDie, CB->getLineNo(), CB->getFile()); + if (DIGlobalVariable *V = CB->getDecl()) + getCU().addLocationAttribute(&NDie, V, GlobalExprs); + return &NDie; +} + +void DwarfCompileUnit::addRange(RangeSpan Range) { + bool SameAsPrevCU = this == DD->getPrevCU(); + DD->setPrevCU(this); + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.empty() || !SameAsPrevCU || + (&CURanges.back().End->getSection() != + &Range.End->getSection())) { + CURanges.push_back(Range); + return; + } + + CURanges.back().End = Range.End; +} + +void DwarfCompileUnit::initStmtList() { + if (CUNode->isDebugDirectivesOnly()) + return; + + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym; + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (DD->useSectionsAsReferences()) { + LineTableStartSym = TLOF.getDwarfLineSection()->getBeginSymbol(); + } else { + LineTableStartSym = + Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); + } + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + StmtListValue = + addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym, + TLOF.getDwarfLineSection()->getBeginSymbol()); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(DIEValueAllocator, *StmtListValue); +} + +void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, + const MCSymbol *End) { + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DD->getDwarfVersion() < 4) + addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. +DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { + DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + + attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); + if (DD->useAppleExtensionAttributes() && + !DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( + *DD->getCurrentFunction())) + addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); + + // Only include DW_AT_frame_base in full debug info + if (!includeMinimalInlineScopes()) { + if (Asm->MF->getTarget().getTargetTriple().isNVPTX()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa); + addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc); + } else { + const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + if (Register::isPhysicalRegister(Location.getReg())) + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } + } + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + DD->addSubprogramNames(*CUNode, SP, *SPDie); + + return *SPDie; +} + +// Construct a DIE for this scope. +void DwarfCompileUnit::constructScopeDIE( + LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) { + if (!Scope || !Scope->getScopeNode()) + return; + + auto *DS = Scope->getScopeNode(); + + assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector<DIE *, 8> Children; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. + DIE *ScopeDIE; + if (Scope->getParent() && isa<DISubprogram>(DS)) { + ScopeDIE = constructInlinedScopeDIE(Scope); + if (!ScopeDIE) + return; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(Scope, Children); + } else { + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + bool HasNonScopeChildren = false; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren); + + // If there are only other scopes as children, put them directly in the + // parent instead, as this scope would serve no purpose. + if (!HasNonScopeChildren) { + FinalChildren.insert(FinalChildren.end(), + std::make_move_iterator(Children.begin()), + std::make_move_iterator(Children.end())); + return; + } + ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); + } + + // Add children + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); + + FinalChildren.push_back(std::move(ScopeDIE)); +} + +void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, + SmallVector<RangeSpan, 2> Range) { + + HasRangeLists = true; + + // Add the range list to the set of ranges to be emitted. + auto IndexAndList = + (DD->getDwarfVersion() < 5 && Skeleton ? Skeleton->DU : DU) + ->addRange(*(Skeleton ? Skeleton : this), std::move(Range)); + + uint32_t Index = IndexAndList.first; + auto &List = *IndexAndList.second; + + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + // FIXME: For DWARF v5, do not generate the DW_AT_ranges attribute under + // fission until we support the forms using the .debug_addr section + // (DW_RLE_startx_endx etc.). + if (DD->getDwarfVersion() >= 5) + addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index); + else { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + } +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, SmallVector<RangeSpan, 2> Ranges) { + if (Ranges.size() == 1 || !DD->useRangesSection()) { + const RangeSpan &Front = Ranges.front(); + const RangeSpan &Back = Ranges.back(); + attachLowHighPC(Die, Front.Begin, Back.End); + } else + addScopeRangeList(Die, std::move(Ranges)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) { + SmallVector<RangeSpan, 2> List; + List.reserve(Ranges.size()); + for (const InsnRange &R : Ranges) + List.push_back( + {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)}); + attachRangesOrLowHighPC(Die, std::move(List)); +} + +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. +DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { + assert(Scope->getScopeNode()); + auto *DS = Scope->getScopeNode(); + auto *InlinedSP = getDISubprogram(DS); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); + + auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); + addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + // Add the call site information to the DIE. + const DILocation *IA = Scope->getInlinedAt(); + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + getOrCreateSourceID(IA->getFile())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + if (IA->getColumn()) + addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn()); + if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4) + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + IA->getDiscriminator()); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + DD->addSubprogramNames(*CUNode, InlinedSP, *ScopeDIE); + + return ScopeDIE; +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) { + if (DD->isLexicalScopeDIENull(Scope)) + return nullptr; + + auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + return ScopeDIE; +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} + +DIE *DwarfCompileUnit::constructLabelDIE(DbgLabel &DL, + const LexicalScope &Scope) { + auto LabelDie = DIE::get(DIEValueAllocator, DL.getTag()); + insertDIE(DL.getLabel(), LabelDie); + DL.setDIE(*LabelDie); + + if (Scope.isAbstractScope()) + applyLabelAttributes(DL, *LabelDie); + + return LabelDie; +} + +DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { + // Define variable debug information entry. + auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag()); + insertDIE(DV.getVariable(), VariableDie); + + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV.getDebugLocListIndex(); + if (Offset != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + return VariableDie; + } + + // Check if variable has a single location description. + if (auto *DVal = DV.getValueLoc()) { + if (DVal->isLocation()) + addVariableAddress(DV, *VariableDie, DVal->getLoc()); + else if (DVal->isInt()) { + auto *Expr = DV.getSingleExpression(); + if (Expr && Expr->getNumElements()) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + // If there is an expression, emit raw unsigned bytes. + DwarfExpr.addFragmentOffset(Expr); + DwarfExpr.addUnsignedConstant(DVal->getInt()); + DwarfExpr.addExpression(Expr); + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + } else + addConstantValue(*VariableDie, DVal->getInt(), DV.getType()); + } else if (DVal->isConstantFP()) { + addConstantFPValue(*VariableDie, DVal->getConstantFP()); + } else if (DVal->isConstantInt()) { + addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType()); + } + return VariableDie; + } + + // .. else use frame index. + if (!DV.hasFrameIndexExprs()) + return VariableDie; + + Optional<unsigned> NVPTXAddressSpace; + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + for (auto &Fragment : DV.getFrameIndexExprs()) { + unsigned FrameReg = 0; + const DIExpression *Expr = Fragment.Expr; + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg); + DwarfExpr.addFragmentOffset(Expr); + SmallVector<uint64_t, 8> Ops; + DIExpression::appendOffset(Ops, Offset); + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } + if (Expr) + Ops.append(Expr->elements_begin(), Expr->elements_end()); + DIExpressionCursor Cursor(Ops); + DwarfExpr.setMemoryLocationKind(); + if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol()) + addOpAddress(*Loc, FrameSymbol); + else + DwarfExpr.addMachineRegExpression( + *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); + DwarfExpr.addExpression(std::move(Cursor)); + } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_local_space = 6; + addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space); + } + addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); + if (DwarfExpr.TagOffset) + addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1, + *DwarfExpr.TagOffset); + + return VariableDie; +} + +DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer) { + auto Var = constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var; + return Var; +} + +/// Return all DIVariables that appear in count: expressions. +static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) { + SmallVector<const DIVariable *, 2> Result; + auto *Array = dyn_cast<DICompositeType>(Var->getType()); + if (!Array || Array->getTag() != dwarf::DW_TAG_array_type) + return Result; + for (auto *El : Array->getElements()) { + if (auto *Subrange = dyn_cast<DISubrange>(El)) { + auto Count = Subrange->getCount(); + if (auto *Dependency = Count.dyn_cast<DIVariable *>()) + Result.push_back(Dependency); + } + } + return Result; +} + +/// Sort local variables so that variables appearing inside of helper +/// expressions come first. +static SmallVector<DbgVariable *, 8> +sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) { + SmallVector<DbgVariable *, 8> Result; + SmallVector<PointerIntPair<DbgVariable *, 1>, 8> WorkList; + // Map back from a DIVariable to its containing DbgVariable. + SmallDenseMap<const DILocalVariable *, DbgVariable *> DbgVar; + // Set of DbgVariables in Result. + SmallDenseSet<DbgVariable *, 8> Visited; + // For cycle detection. + SmallDenseSet<DbgVariable *, 8> Visiting; + + // Initialize the worklist and the DIVariable lookup table. + for (auto Var : reverse(Input)) { + DbgVar.insert({Var->getVariable(), Var}); + WorkList.push_back({Var, 0}); + } + + // Perform a stable topological sort by doing a DFS. + while (!WorkList.empty()) { + auto Item = WorkList.back(); + DbgVariable *Var = Item.getPointer(); + bool visitedAllDependencies = Item.getInt(); + WorkList.pop_back(); + + // Dependency is in a different lexical scope or a global. + if (!Var) + continue; + + // Already handled. + if (Visited.count(Var)) + continue; + + // Add to Result if all dependencies are visited. + if (visitedAllDependencies) { + Visited.insert(Var); + Result.push_back(Var); + continue; + } + + // Detect cycles. + auto Res = Visiting.insert(Var); + if (!Res.second) { + assert(false && "dependency cycle in local variables"); + return Result; + } + + // Push dependencies and this node onto the worklist, so that this node is + // visited again after all of its dependencies are handled. + WorkList.push_back({Var, 1}); + for (auto *Dependency : dependencies(Var)) { + auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency); + WorkList.push_back({DbgVar[Dep], 0}); + } + } + return Result; +} + +DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children, + bool *HasNonScopeChildren) { + assert(Children.empty()); + DIE *ObjectPointer = nullptr; + + // Emit function arguments (order is significant). + auto Vars = DU->getScopeVariables().lookup(Scope); + for (auto &DV : Vars.Args) + Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer)); + + // Emit local variables. + auto Locals = sortLocalVars(Vars.Locals); + for (DbgVariable *DV : Locals) + Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + // Skip imported directives in gmlt-like data. + if (!includeMinimalInlineScopes()) { + // There is no need to emit empty lexical block DIE. + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + Children.push_back( + constructImportedEntityDIE(cast<DIImportedEntity>(IE))); + } + + if (HasNonScopeChildren) + *HasNonScopeChildren = !Children.empty(); + + for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) + Children.push_back(constructLabelDIE(*DL, *Scope)); + + for (LexicalScope *LS : Scope->getChildren()) + constructScopeDIE(LS, Children); + + return ObjectPointer; +} + +DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, + LexicalScope *Scope) { + DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + + if (Scope) { + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + // Collect lexical scope children first. + // ObjectPointer might be a local (non-argument) local variable if it's a + // block's synthetic this pointer. + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) + addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + } + + // If this is a variadic function, add an unspecified parameter. + DITypeRefArray FnArgs = Sub->getType()->getTypeArray(); + + // If we have a single element of null, it is a function that returns void. + // If we have more than one elements and the last one is null, it is a + // variadic function. + if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] && + !includeMinimalInlineScopes()) + ScopeDIE.addChild( + DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters)); + + return ScopeDIE; +} + +DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, + DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); + + return ObjectPointer; +} + +void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( + LexicalScope *Scope) { + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; + + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); + + DIE *ContextDIE; + DwarfCompileUnit *ContextCU = this; + + if (includeMinimalInlineScopes()) + ContextDIE = &getUnitDie(); + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the debug node is not associated with the + // DIE (since the debug node will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + else if (auto *SPDecl = SP->getDeclaration()) { + ContextDIE = &getUnitDie(); + getOrCreateSubprogramDIE(SPDecl); + } else { + ContextDIE = getOrCreateContextDIE(SP->getScope()); + // The scope may be shared with a subprogram that has already been + // constructed in another CU, in which case we need to construct this + // subprogram in the same CU. + ContextCU = DD->lookupCU(ContextDIE->getUnitDie()); + } + + // Passing null as the associated node because the abstract definition + // shouldn't be found by lookup. + AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); + ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); + + if (!ContextCU->includeMinimalInlineScopes()) + ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) + ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); +} + +/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom. +static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) { + return DD->getDwarfVersion() == 4 && DD->tuneForGDB(); +} + +dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Tag; + switch (Tag) { + case dwarf::DW_TAG_call_site: + return dwarf::DW_TAG_GNU_call_site; + case dwarf::DW_TAG_call_site_parameter: + return dwarf::DW_TAG_GNU_call_site_parameter; + default: + llvm_unreachable("DWARF5 tag with no GNU analog"); + } +} + +dwarf::Attribute +DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Attr; + switch (Attr) { + case dwarf::DW_AT_call_all_calls: + return dwarf::DW_AT_GNU_all_call_sites; + case dwarf::DW_AT_call_target: + return dwarf::DW_AT_GNU_call_site_target; + case dwarf::DW_AT_call_origin: + return dwarf::DW_AT_abstract_origin; + case dwarf::DW_AT_call_pc: + return dwarf::DW_AT_low_pc; + case dwarf::DW_AT_call_value: + return dwarf::DW_AT_GNU_call_site_value; + case dwarf::DW_AT_call_tail_call: + return dwarf::DW_AT_GNU_tail_call; + default: + llvm_unreachable("DWARF5 attribute with no GNU analog"); + } +} + +dwarf::LocationAtom +DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const { + if (!useGNUAnalogForDwarf5Feature(DD)) + return Loc; + switch (Loc) { + case dwarf::DW_OP_entry_value: + return dwarf::DW_OP_GNU_entry_value; + default: + llvm_unreachable("DWARF5 location atom with no GNU analog"); + } +} + +DIE &DwarfCompileUnit::constructCallSiteEntryDIE( + DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail, + const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) { + // Insert a call site entry DIE within ScopeDIE. + DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site), + ScopeDIE, nullptr); + + if (CallReg) { + // Indirect call. + addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target), + MachineLocation(CallReg)); + } else { + DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP); + assert(CalleeDIE && "Could not create DIE for call site entry origin"); + addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin), + *CalleeDIE); + } + + if (IsTail) + // Attach DW_AT_call_tail_call to tail calls for standards compliance. + addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call)); + + // Attach the return PC to allow the debugger to disambiguate call paths + // from one function to another. + if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) { + assert(PCAddr && "Missing PC information for a call"); + addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr); + } else if (!IsTail || DD->tuneForGDB()) { + assert(PCOffset && "Missing return PC information for a call"); + addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset); + } + + return CallSiteDIE; +} + +void DwarfCompileUnit::constructCallSiteParmEntryDIEs( + DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) { + for (const auto &Param : Params) { + unsigned Register = Param.getRegister(); + auto CallSiteDieParam = + DIE::get(DIEValueAllocator, + getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter)); + insertDIE(CallSiteDieParam); + addAddress(*CallSiteDieParam, dwarf::DW_AT_location, + MachineLocation(Register)); + + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + DwarfExpr.setCallSiteParamValueFlag(); + + DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr); + + addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value), + DwarfExpr.finalize()); + + CallSiteDIE.addChild(CallSiteDieParam); + } +} + +DIE *DwarfCompileUnit::constructImportedEntityDIE( + const DIImportedEntity *Module) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); + insertDIE(Module, IMDie); + DIE *EntityDie; + auto *Entity = Module->getEntity(); + if (auto *NS = dyn_cast<DINamespace>(Entity)) + EntityDie = getOrCreateNameSpace(NS); + else if (auto *M = dyn_cast<DIModule>(Entity)) + EntityDie = getOrCreateModule(M); + else if (auto *SP = dyn_cast<DISubprogram>(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast<DIType>(Entity)) + EntityDie = getOrCreateTypeDIE(T); + else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity)) + EntityDie = getOrCreateGlobalVariableDIE(GV, {}); + else + EntityDie = getDIE(Entity); + assert(EntityDie); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); + addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); + StringRef Name = Module->getName(); + if (!Name.empty()) + addString(*IMDie, dwarf::DW_AT_name, Name); + + return IMDie; +} + +void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { + DIE *D = getDIE(SP); + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + assert(D || includeMinimalInlineScopes()); + if (D) + // And attach the attributes + applySubprogramAttributesToDefinition(SP, *D); + } +} + +void DwarfCompileUnit::finishEntityDefinition(const DbgEntity *Entity) { + DbgEntity *AbsEntity = getExistingAbstractEntity(Entity->getEntity()); + + auto *Die = Entity->getDIE(); + /// Label may be used to generate DW_AT_low_pc, so put it outside + /// if/else block. + const DbgLabel *Label = nullptr; + if (AbsEntity && AbsEntity->getDIE()) { + addDIEEntry(*Die, dwarf::DW_AT_abstract_origin, *AbsEntity->getDIE()); + Label = dyn_cast<const DbgLabel>(Entity); + } else { + if (const DbgVariable *Var = dyn_cast<const DbgVariable>(Entity)) + applyVariableAttributes(*Var, *Die); + else if ((Label = dyn_cast<const DbgLabel>(Entity))) + applyLabelAttributes(*Label, *Die); + else + llvm_unreachable("DbgEntity must be DbgVariable or DbgLabel."); + } + + if (Label) + if (const auto *Sym = Label->getSymbol()) + addLabelAddress(*Die, dwarf::DW_AT_low_pc, Sym); +} + +DbgEntity *DwarfCompileUnit::getExistingAbstractEntity(const DINode *Node) { + auto &AbstractEntities = getAbstractEntities(); + auto I = AbstractEntities.find(Node); + if (I != AbstractEntities.end()) + return I->second.get(); + return nullptr; +} + +void DwarfCompileUnit::createAbstractEntity(const DINode *Node, + LexicalScope *Scope) { + assert(Scope && Scope->isAbstractScope()); + auto &Entity = getAbstractEntities()[Node]; + if (isa<const DILocalVariable>(Node)) { + Entity = std::make_unique<DbgVariable>( + cast<const DILocalVariable>(Node), nullptr /* IA */);; + DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get())); + } else if (isa<const DILabel>(Node)) { + Entity = std::make_unique<DbgLabel>( + cast<const DILabel>(Node), nullptr /* IA */); + DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get())); + } +} + +void DwarfCompileUnit::emitHeader(bool UseOffsets) { + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton && !DD->useSectionsAsReferences()) { + LabelBegin = Asm->createTempSymbol("cu_begin"); + Asm->OutStreamer->EmitLabel(LabelBegin); + } + + dwarf::UnitType UT = Skeleton ? dwarf::DW_UT_split_compile + : DD->useSplitDwarf() ? dwarf::DW_UT_skeleton + : dwarf::DW_UT_compile; + DwarfUnit::emitCommonHeader(UseOffsets, UT); + if (DD->getDwarfVersion() >= 5 && UT != dwarf::DW_UT_compile) + Asm->emitInt64(getDWOId()); +} + +bool DwarfCompileUnit::hasDwarfPubSections() const { + switch (CUNode->getNameTableKind()) { + case DICompileUnit::DebugNameTableKind::None: + return false; + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + case DICompileUnit::DebugNameTableKind::GNU: + return true; + case DICompileUnit::DebugNameTableKind::Default: + return DD->tuneForGDB() && !includeMinimalInlineScopes() && + !CUNode->isDebugDirectivesOnly() && + DD->getAccelTableKind() != AccelTableKind::Apple && + DD->getDwarfVersion() < 5; + } + llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum"); +} + +/// addGlobalName - Add a new global name to the compile unit. +void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, + const DIScope *Context) { + if (!hasDwarfPubSections()) + return; + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = &Die; +} + +void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, + const DIScope *Context) { + if (!hasDwarfPubSections()) + return; + std::string FullName = getParentContextString(Context) + Name.str(); + // Insert, allowing the entry to remain as-is if it's already present + // This way the CU-level type DIE is preferred over the "can't describe this + // type as a unit offset because it's not really in the CU at all, it's only + // in a type unit" + GlobalNames.insert(std::make_pair(std::move(FullName), &getUnitDie())); +} + +/// Add a new global type to the unit. +void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) { + if (!hasDwarfPubSections()) + return; + std::string FullName = getParentContextString(Context) + Ty->getName().str(); + GlobalTypes[FullName] = &Die; +} + +void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, + const DIScope *Context) { + if (!hasDwarfPubSections()) + return; + std::string FullName = getParentContextString(Context) + Ty->getName().str(); + // Insert, allowing the entry to remain as-is if it's already present + // This way the CU-level type DIE is preferred over the "can't describe this + // type as a unit offset because it's not really in the CU at all, it's only + // in a type unit" + GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie())); +} + +void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location) { + if (DV.hasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// Add an address attribute to a die based on the location provided. +void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); + + DIExpressionCursor Cursor({}); + const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); + if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) + return; + DwarfExpr.addExpression(std::move(Cursor)); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, DwarfExpr.finalize()); +} + +/// Start with the address based on the location provided, and generate the +/// DWARF information necessary to find the actual variable given the extra +/// address information encoded in the DbgVariable, starting from the starting +/// location. Add the DWARF information to the die. +void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + const DIExpression *DIExpr = DV.getSingleExpression(); + DwarfExpr.addFragmentOffset(DIExpr); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); + + DIExpressionCursor Cursor(DIExpr); + + if (DIExpr->isEntryValue()) { + DwarfExpr.setEntryValueFlag(); + DwarfExpr.beginEntryValueExpression(Cursor); + } + + const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo(); + if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) + return; + DwarfExpr.addExpression(std::move(Cursor)); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, DwarfExpr.finalize()); +} + +/// Add a Dwarf loclistptr attribute data and value. +void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, + unsigned Index) { + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); +} + +void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + const auto *DIVar = Var.getVariable(); + if (DIVar) + if (uint32_t AlignInBytes = DIVar->getAlignInBytes()) + addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + + addSourceLine(VariableDie, DIVar); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + +void DwarfCompileUnit::applyLabelAttributes(const DbgLabel &Label, + DIE &LabelDie) { + StringRef Name = Label.getName(); + if (!Name.empty()) + addString(LabelDie, dwarf::DW_AT_name, Name); + const auto *DILabel = Label.getLabel(); + addSourceLine(LabelDie, DILabel); +} + +/// Add a Dwarf expression attribute data and value. +void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, + const MCExpr *Expr) { + Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); +} + +void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute, + const MCExpr *Expr) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIEExpr(Expr)); +} + +void DwarfCompileUnit::applySubprogramAttributesToDefinition( + const DISubprogram *SP, DIE &SPDie) { + auto *SPDecl = SP->getDeclaration(); + auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope(); + applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); + addGlobalName(SP->getName(), SPDie, Context); +} + +bool DwarfCompileUnit::isDwoUnit() const { + return DD->useSplitDwarf() && Skeleton; +} + +void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { + constructTypeDIE(D, CTy); +} + +bool DwarfCompileUnit::includeMinimalInlineScopes() const { + return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly || + (DD->useSplitDwarf() && !Skeleton); +} + +void DwarfCompileUnit::addAddrTableBase() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + MCSymbol *Label = DD->getAddressPool().getLabel(); + addSectionLabel(getUnitDie(), + getDwarfVersion() >= 5 ? dwarf::DW_AT_addr_base + : dwarf::DW_AT_GNU_addr_base, + Label, TLOF.getDwarfAddrSection()->getBeginSymbol()); +} + +void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) { + Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata, + new (DIEValueAllocator) DIEBaseTypeRef(this, Idx)); +} + +void DwarfCompileUnit::createBaseTypeDIEs() { + // Insert the base_type DIEs directly after the CU so that their offsets will + // fit in the fixed size ULEB128 used inside the location expressions. + // Maintain order by iterating backwards and inserting to the front of CU + // child list. + for (auto &Btr : reverse(ExprRefedBaseTypes)) { + DIE &Die = getUnitDie().addChildFront( + DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type)); + SmallString<32> Str; + addString(Die, dwarf::DW_AT_name, + Twine(dwarf::AttributeEncodingString(Btr.Encoding) + + "_" + Twine(Btr.BitSize)).toStringRef(Str)); + addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding); + addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8); + + Btr.Die = &Die; + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 000000000000..1b7ea2673ac0 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,365 @@ +//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Support/Casting.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <memory> + +namespace llvm { + +class AsmPrinter; +class DwarfFile; +class GlobalVariable; +class MCExpr; +class MCSymbol; +class MDNode; + +class DwarfCompileUnit final : public DwarfUnit { + /// A numeric ID unique among all CUs in the module + unsigned UniqueID; + bool HasRangeLists = false; + + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + DIE::value_iterator StmtListValue; + + /// Skeleton unit associated with this unit. + DwarfCompileUnit *Skeleton = nullptr; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + /// The start of the unit macro info within macro section. + MCSymbol *MacroLabelBegin; + + using ImportedEntityList = SmallVector<const MDNode *, 8>; + using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>; + + ImportedEntityMap ImportedEntities; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap<const DIE *> GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap<const DIE *> GlobalTypes; + + // List of ranges for a given compile unit. + SmallVector<RangeSpan, 2> CURanges; + + // The base address of this unit, if any. Used for relative references in + // ranges/locs. + const MCSymbol *BaseAddress = nullptr; + + DenseMap<const MDNode *, DIE *> AbstractSPDies; + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities; + + /// DWO ID for correlating skeleton and split units. + uint64_t DWOId = 0; + + /// Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); + + bool isDwoUnit() const override; + + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractSPDies; + return DU->getAbstractSPDies(); + } + + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() { + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return AbstractEntities; + return DU->getAbstractEntities(); + } + + void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override; + +public: + DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + bool hasRangeLists() const { return HasRangeLists; } + unsigned getUniqueID() const { return UniqueID; } + + DwarfCompileUnit *getSkeleton() const { + return Skeleton; + } + + bool includeMinimalInlineScopes() const; + + void initStmtList(); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// A pair of GlobalVariable and DIExpression. + struct GlobalExpr { + const GlobalVariable *Var; + const DIExpression *Expr; + }; + + struct BaseTypeRef { + BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) : + BitSize(BitSize), Encoding(Encoding) {} + unsigned BitSize; + dwarf::TypeKind Encoding; + DIE *Die = nullptr; + }; + + std::vector<BaseTypeRef> ExprRefedBaseTypes; + + /// Get or create global variable DIE. + DIE * + getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV, + ArrayRef<GlobalExpr> GlobalExprs); + + DIE *getOrCreateCommonBlock(const DICommonBlock *CB, + ArrayRef<GlobalExpr> GlobalExprs); + + void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV, + ArrayRef<GlobalExpr> GlobalExprs); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(const DIFile *File) override; + + void addImportedEntity(const DIImportedEntity* IE) { + DIScope *Scope = IE->getScope(); + assert(Scope && "Invalid Scope encoding!"); + if (!isa<DILocalScope>(Scope)) + // No need to add imported enities that are not local declaration. + return; + + auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope(); + ImportedEntities[LocalScope].push_back(IE); + } + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); + + /// Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. + DIE &updateSubprogramScopeDIE(const DISubprogram *SP); + + void constructScopeDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &FinalChildren); + + /// A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range); + + void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges); + + void attachRangesOrLowHighPC(DIE &D, + const SmallVectorImpl<InsnRange> &Ranges); + + /// This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. + DIE *constructInlinedScopeDIE(LexicalScope *Scope); + + /// Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. + DIE *constructLexicalScopeDIE(LexicalScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false); + + DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope, + DIE *&ObjectPointer); + + /// Construct a DIE for the given DbgLabel. + DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope); + + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children, + bool *HasNonScopeChildren = nullptr); + + void createBaseTypeDIEs(); + + /// Construct a DIE for this subprogram scope. + DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, + LexicalScope *Scope); + + DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); + + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + + /// This takes a DWARF 5 tag and returns it or a GNU analog. + dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const; + + /// This takes a DWARF 5 attribute and returns it or a GNU analog. + dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const; + + /// This takes a DWARF 5 location atom and either returns it or a GNU analog. + dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const; + + /// Construct a call site entry DIE describing a call within \p Scope to a + /// callee described by \p CalleeSP. + /// \p IsTail specifies whether the call is a tail call. + /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after + /// the call instruction. + /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be + /// non-zero for non-tail calls (in the case of non-gdb tuning, since for + /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the + /// function-local offset to PC value after the call instruction. + /// \p CallReg is a register location for an indirect call. For direct calls + /// the \p CallReg is set to 0. + DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP, + bool IsTail, const MCSymbol *PCAddr, + const MCExpr *PCOffset, unsigned CallReg); + /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params + /// were collected by the \ref collectCallSiteParameters. + /// Note: The order of parameters does not matter, since debuggers recognize + /// call site parameters by the DW_AT_location attribute. + void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, + SmallVector<DbgCallSiteParam, 4> &Params); + + /// Construct import_module DIE. + DIE *constructImportedEntityDIE(const DIImportedEntity *Module); + + void finishSubprogramDefinition(const DISubprogram *SP); + void finishEntityDefinition(const DbgEntity *Entity); + + /// Find abstract variable associated with Var. + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; + DbgEntity *getExistingAbstractEntity(const DINode *Node); + void createAbstractEntity(const DINode *Node, LexicalScope *Scope); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } + + unsigned getHeaderSize() const override { + // DWARF v5 added the DWO ID to the header for split/skeleton units. + unsigned DWOIdSize = + DD->getDwarfVersion() >= 5 && DD->useSplitDwarf() ? sizeof(uint64_t) + : 0; + return DwarfUnit::getHeaderSize() + DWOIdSize; + } + unsigned getLength() { + return sizeof(uint32_t) + // Length field + getHeaderSize() + getUnitDie().getSize(); + } + + void emitHeader(bool UseOffsets) override; + + /// Add the DW_AT_addr_base attribute to the unit DIE. + void addAddrTableBase(); + + MCSymbol *getLabelBegin() const { + assert(getSection()); + return LabelBegin; + } + + MCSymbol *getMacroLabelBegin() const { + return MacroLabelBegin; + } + + /// Add a new global name to the compile unit. + void addGlobalName(StringRef Name, const DIE &Die, + const DIScope *Context) override; + + /// Add a new global name present in a type unit to this compile unit. + void addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context); + + /// Add a new global type to the compile unit. + void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; + + /// Add a new global type present in a type unit to this compile unit. + void addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context); + + const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } + + /// Add DW_AT_location attribute for a DbgVariable based on provided + /// MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location); + /// Add an address attribute to a die based on the location provided. + void addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual variable (navigating the + /// extra location information encoded in the type) based on the starting + /// location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + + /// Add a Dwarf expression attribute data and value. + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + + /// Add an attribute containing an address expression to \p Die. + void addAddressExpr(DIE &Die, dwarf::Attribute Attribute, const MCExpr *Expr); + + void applySubprogramAttributesToDefinition(const DISubprogram *SP, + DIE &SPDie); + + void applyLabelAttributes(const DbgLabel &Label, DIE &LabelDie); + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } + SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); } + + void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } + const MCSymbol *getBaseAddress() const { return BaseAddress; } + + uint64_t getDWOId() const { return DWOId; } + void setDWOId(uint64_t DwoId) { DWOId = DwoId; } + + bool hasDwarfPubSections() const; + + void addBaseTypeRef(DIEValueList &Die, int64_t Idx); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 000000000000..c505e77e5acd --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,3038 @@ +//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfDebug.h" +#include "ByteStreamer.h" +#include "DIEHash.h" +#include "DebugLocEntry.h" +#include "DebugLocStream.h" +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" +#include "DwarfFile.h" +#include "DwarfUnit.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/DebugInfo/DWARF/DWARFExpression.h" +#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/SectionKind.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <string> +#include <utility> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +STATISTIC(NumCSParams, "Number of dbg call site params created"); + +static cl::opt<bool> +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + +static cl::opt<bool> UseDwarfRangesBaseAddressSpecifier( + "use-dwarf-ranges-base-address-specifier", cl::Hidden, + cl::desc("Use base address specifiers in debug_ranges"), cl::init(false)); + +static cl::opt<bool> GenerateARangeSection("generate-arange-section", + cl::Hidden, + cl::desc("Generate dwarf aranges"), + cl::init(false)); + +static cl::opt<bool> + GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + +static cl::opt<bool> SplitDwarfCrossCuReferences( + "split-dwarf-cross-cu-references", cl::Hidden, + cl::desc("Enable cross-cu references in DWO files"), cl::init(false)); + +enum DefaultOnOff { Default, Enable, Disable }; + +static cl::opt<DefaultOnOff> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::values(clEnumVal(Default, "At top of block or after label"), + clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")), + cl::init(Default)); + +static cl::opt<AccelTableKind> AccelTables( + "accel-tables", cl::Hidden, cl::desc("Output dwarf accelerator tables."), + cl::values(clEnumValN(AccelTableKind::Default, "Default", + "Default for platform"), + clEnumValN(AccelTableKind::None, "Disable", "Disabled."), + clEnumValN(AccelTableKind::Apple, "Apple", "Apple"), + clEnumValN(AccelTableKind::Dwarf, "Dwarf", "DWARF")), + cl::init(AccelTableKind::Default)); + +static cl::opt<DefaultOnOff> +DwarfInlinedStrings("dwarf-inlined-strings", cl::Hidden, + cl::desc("Use inlined strings rather than string section."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled")), + cl::init(Default)); + +static cl::opt<bool> + NoDwarfRangesSection("no-dwarf-ranges-section", cl::Hidden, + cl::desc("Disable emission .debug_ranges section."), + cl::init(false)); + +static cl::opt<DefaultOnOff> DwarfSectionsAsReferences( + "dwarf-sections-as-references", cl::Hidden, + cl::desc("Use sections+offset as references rather than labels."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), clEnumVal(Disable, "Disabled")), + cl::init(Default)); + +enum LinkageNameOption { + DefaultLinkageNames, + AllLinkageNames, + AbstractLinkageNames +}; + +static cl::opt<LinkageNameOption> + DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, + cl::desc("Which DWARF linkage-name attributes to emit."), + cl::values(clEnumValN(DefaultLinkageNames, "Default", + "Default for platform"), + clEnumValN(AllLinkageNames, "All", "All"), + clEnumValN(AbstractLinkageNames, "Abstract", + "Abstract subprograms")), + cl::init(DefaultLinkageNames)); + +static const char *const DWARFGroupName = "dwarf"; +static const char *const DWARFGroupDescription = "DWARF Emission"; +static const char *const DbgTimerName = "writer"; +static const char *const DbgTimerDescription = "DWARF Debug Writer"; +static constexpr unsigned ULEB128PadSize = 4; + +void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) { + getActiveStreamer().EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::emitSigned(int64_t Value) { + getActiveStreamer().EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) { + getActiveStreamer().EmitULEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::emitData1(uint8_t Value) { + getActiveStreamer().EmitInt8(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) { + assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit"); + getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize); +} + +bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + +void DebugLocDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + if (!TmpBuf) + TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments); + IsBuffering = true; +} + +void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DebugLocDwarfExpression::getTemporaryBufferSize() { + return TmpBuf ? TmpBuf->Bytes.size() : 0; +} + +void DebugLocDwarfExpression::commitTemporaryBuffer() { + if (!TmpBuf) + return; + for (auto Byte : enumerate(TmpBuf->Bytes)) { + const char *Comment = (Byte.index() < TmpBuf->Comments.size()) + ? TmpBuf->Comments[Byte.index()].c_str() + : ""; + OutBS.EmitInt8(Byte.value(), Comment); + } + TmpBuf->Bytes.clear(); + TmpBuf->Comments.clear(); +} + +const DIType *DbgVariable::getType() const { + return getVariable()->getType(); +} + +/// Get .debug_loc entry for the instruction range starting at MI. +static DbgValueLoc getDebugLocValue(const MachineInstr *MI) { + const DIExpression *Expr = MI->getDebugExpression(); + assert(MI->getNumOperands() == 4); + if (MI->getOperand(0).isReg()) { + auto RegOp = MI->getOperand(0); + auto Op1 = MI->getOperand(1); + // If the second operand is an immediate, this is a + // register-indirect address. + assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset"); + MachineLocation MLoc(RegOp.getReg(), Op1.isImm()); + return DbgValueLoc(Expr, MLoc); + } + if (MI->getOperand(0).isImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DbgValueLoc(Expr, MI->getOperand(0).getCImm()); + + llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); +} + +void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) { + assert(FrameIndexExprs.empty() && "Already initialized?"); + assert(!ValueLoc.get() && "Already initialized?"); + + assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable"); + assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() && + "Wrong inlined-at"); + + ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue)); + if (auto *E = DbgValue->getDebugExpression()) + if (E->getNumElements()) + FrameIndexExprs.push_back({0, E}); +} + +ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const { + if (FrameIndexExprs.size() == 1) + return FrameIndexExprs; + + assert(llvm::all_of(FrameIndexExprs, + [](const FrameIndexExpr &A) { + return A.Expr->isFragment(); + }) && + "multiple FI expressions without DW_OP_LLVM_fragment"); + llvm::sort(FrameIndexExprs, + [](const FrameIndexExpr &A, const FrameIndexExpr &B) -> bool { + return A.Expr->getFragmentInfo()->OffsetInBits < + B.Expr->getFragmentInfo()->OffsetInBits; + }); + + return FrameIndexExprs; +} + +void DbgVariable::addMMIEntry(const DbgVariable &V) { + assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry"); + assert(V.getVariable() == getVariable() && "conflicting variable"); + assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location"); + + assert(!FrameIndexExprs.empty() && "Expected an MMI entry"); + assert(!V.FrameIndexExprs.empty() && "Expected an MMI entry"); + + // FIXME: This logic should not be necessary anymore, as we now have proper + // deduplication. However, without it, we currently run into the assertion + // below, which means that we are likely dealing with broken input, i.e. two + // non-fragment entries for the same variable at different frame indices. + if (FrameIndexExprs.size()) { + auto *Expr = FrameIndexExprs.back().Expr; + if (!Expr || !Expr->isFragment()) + return; + } + + for (const auto &FIE : V.FrameIndexExprs) + // Ignore duplicate entries. + if (llvm::none_of(FrameIndexExprs, [&](const FrameIndexExpr &Other) { + return FIE.FI == Other.FI && FIE.Expr == Other.Expr; + })) + FrameIndexExprs.push_back(FIE); + + assert((FrameIndexExprs.size() == 1 || + llvm::all_of(FrameIndexExprs, + [](FrameIndexExpr &FIE) { + return FIE.Expr && FIE.Expr->isFragment(); + })) && + "conflicting locations for variable"); +} + +static AccelTableKind computeAccelTableKind(unsigned DwarfVersion, + bool GenerateTypeUnits, + DebuggerKind Tuning, + const Triple &TT) { + // Honor an explicit request. + if (AccelTables != AccelTableKind::Default) + return AccelTables; + + // Accelerator tables with type units are currently not supported. + if (GenerateTypeUnits) + return AccelTableKind::None; + + // Accelerator tables get emitted if targetting DWARF v5 or LLDB. DWARF v5 + // always implies debug_names. For lower standard versions we use apple + // accelerator tables on apple platforms and debug_names elsewhere. + if (DwarfVersion >= 5) + return AccelTableKind::Dwarf; + if (Tuning == DebuggerKind::LLDB) + return TT.isOSBinFormatMachO() ? AccelTableKind::Apple + : AccelTableKind::Dwarf; + return AccelTableKind::None; +} + +DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) + : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()), + InfoHolder(A, "info_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator), + IsDarwin(A->TM.getTargetTriple().isOSDarwin()) { + const Triple &TT = Asm->TM.getTargetTriple(); + + // Make sure we know our "debugger tuning". The target option takes + // precedence; fall back to triple-based defaults. + if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) + DebuggerTuning = Asm->TM.Options.DebuggerTuning; + else if (IsDarwin) + DebuggerTuning = DebuggerKind::LLDB; + else if (TT.isPS4CPU()) + DebuggerTuning = DebuggerKind::SCE; + else + DebuggerTuning = DebuggerKind::GDB; + + if (DwarfInlinedStrings == Default) + UseInlineStrings = TT.isNVPTX(); + else + UseInlineStrings = DwarfInlinedStrings == Enable; + + UseLocSection = !TT.isNVPTX(); + + HasAppleExtensionAttributes = tuneForLLDB(); + + // Handle split DWARF. + HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); + + // SCE defaults to linkage names only for abstract subprograms. + if (DwarfLinkageNames == DefaultLinkageNames) + UseAllLinkageNames = !tuneForSCE(); + else + UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames; + + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; + unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber + : MMI->getModule()->getDwarfVersion(); + // Use dwarf 4 by default if nothing is requested. For NVPTX, use dwarf 2. + DwarfVersion = + TT.isNVPTX() ? 2 : (DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION); + + UseRangesSection = !NoDwarfRangesSection && !TT.isNVPTX(); + + // Use sections as references. Force for NVPTX. + if (DwarfSectionsAsReferences == Default) + UseSectionsAsReferences = TT.isNVPTX(); + else + UseSectionsAsReferences = DwarfSectionsAsReferences == Enable; + + // Don't generate type units for unsupported object file formats. + GenerateTypeUnits = + A->TM.getTargetTriple().isOSBinFormatELF() && GenerateDwarfTypeUnits; + + TheAccelTableKind = computeAccelTableKind( + DwarfVersion, GenerateTypeUnits, DebuggerTuning, A->TM.getTargetTriple()); + + // Work around a GDB bug. GDB doesn't support the standard opcode; + // SCE doesn't support GNU's; LLDB prefers the standard opcode, which + // is defined as of DWARF 3. + // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented + // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 + UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; + + // GDB does not fully support the DWARF 4 representation for bitfields. + UseDWARF2Bitfields = (DwarfVersion < 4) || tuneForGDB(); + + // The DWARF v5 string offsets table has - possibly shared - contributions + // from each compile and type unit each preceded by a header. The string + // offsets table used by the pre-DWARF v5 split-DWARF implementation uses + // a monolithic string offsets table without any header. + UseSegmentedStringOffsetsTable = DwarfVersion >= 5; + + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); +} + +// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. +DwarfDebug::~DwarfDebug() = default; + +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) + return false; + + return Name.find(") ") != StringRef::npos; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, + const DISubprogram *SP, DIE &Die) { + if (getAccelTableKind() != AccelTableKind::Apple && + CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None) + return; + + if (!SP->isDefinition()) + return; + + if (SP->getName() != "") + addAccelName(CU, SP->getName(), Die); + + // If the linkage name is different than the name, go ahead and output that as + // well into the name table. Only do that if we are going to actually emit + // that name. + if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && + (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) + addAccelName(CU, SP->getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP->getName())) { + StringRef Class, Category; + getObjCClassCategory(SP->getName(), Class, Category); + addAccelObjC(CU, Class, Die); + if (Category != "") + addAccelObjC(CU, Category, Die); + // Also add the base method name to the name table. + addAccelName(CU, getObjCMethodName(SP->getName()), Die); + } +} + +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + return !getLabelAfterInsn(Ranges.front().second); +} + +template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) { + F(CU); + if (auto *SkelCU = CU.getSkeleton()) + if (CU.getCUNode()->getSplitDebugInlining()) + F(*SkelCU); +} + +bool DwarfDebug::shareAcrossDWOCUs() const { + return SplitDwarfCrossCuReferences; +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, + LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(Scope->isAbstractScope()); + assert(!Scope->getInlinedAt()); + + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); + + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining()) + // Avoid building the original CU if it won't be used + SrcCU.constructAbstractSubprogramScopeDIE(Scope); + else { + auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + if (auto *SkelCU = CU.getSkeleton()) { + (shareAcrossDWOCUs() ? CU : SrcCU) + .constructAbstractSubprogramScopeDIE(Scope); + if (CU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructAbstractSubprogramScopeDIE(Scope); + } else + CU.constructAbstractSubprogramScopeDIE(Scope); + } +} + +/// Try to interpret values loaded into registers that forward parameters +/// for \p CallMI. Store parameters with interpreted value into \p Params. +static void collectCallSiteParameters(const MachineInstr *CallMI, + ParamSet &Params) { + auto *MF = CallMI->getMF(); + auto CalleesMap = MF->getCallSitesInfo(); + auto CallFwdRegsInfo = CalleesMap.find(CallMI); + + // There is no information for the call instruction. + if (CallFwdRegsInfo == CalleesMap.end()) + return; + + auto *MBB = CallMI->getParent(); + const auto &TRI = MF->getSubtarget().getRegisterInfo(); + const auto &TII = MF->getSubtarget().getInstrInfo(); + const auto &TLI = MF->getSubtarget().getTargetLowering(); + + // Skip the call instruction. + auto I = std::next(CallMI->getReverseIterator()); + + DenseSet<unsigned> ForwardedRegWorklist; + // Add all the forwarding registers into the ForwardedRegWorklist. + for (auto ArgReg : CallFwdRegsInfo->second) { + bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second; + assert(InsertedReg && "Single register used to forward two arguments?"); + (void)InsertedReg; + } + + // We erase, from the ForwardedRegWorklist, those forwarding registers for + // which we successfully describe a loaded value (by using + // the describeLoadedValue()). For those remaining arguments in the working + // list, for which we do not describe a loaded value by + // the describeLoadedValue(), we try to generate an entry value expression + // for their call site value desctipion, if the call is within the entry MBB. + // The RegsForEntryValues maps a forwarding register into the register holding + // the entry value. + // TODO: Handle situations when call site parameter value can be described + // as the entry value within basic blocks other then the first one. + bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin(); + DenseMap<unsigned, unsigned> RegsForEntryValues; + + // If the MI is an instruction defining one or more parameters' forwarding + // registers, add those defines. We can currently only describe forwarded + // registers that are explicitly defined, but keep track of implicit defines + // also to remove those registers from the work list. + auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI, + SmallVectorImpl<unsigned> &Explicit, + SmallVectorImpl<unsigned> &Implicit) { + if (MI.isDebugInstr()) + return; + + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg() && MO.isDef() && + Register::isPhysicalRegister(MO.getReg())) { + for (auto FwdReg : ForwardedRegWorklist) { + if (TRI->regsOverlap(FwdReg, MO.getReg())) { + if (MO.isImplicit()) + Implicit.push_back(FwdReg); + else + Explicit.push_back(FwdReg); + break; + } + } + } + } + }; + + auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) { + unsigned FwdReg = Reg; + if (ShouldTryEmitEntryVals) { + auto EntryValReg = RegsForEntryValues.find(Reg); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + } + + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + }; + + // Search for a loading value in forwaring registers. + for (; I != MBB->rend(); ++I) { + // If the next instruction is a call we can not interpret parameter's + // forwarding registers or we finished the interpretation of all parameters. + if (I->isCall()) + return; + + if (ForwardedRegWorklist.empty()) + return; + + SmallVector<unsigned, 4> ExplicitFwdRegDefs; + SmallVector<unsigned, 4> ImplicitFwdRegDefs; + getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs); + if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty()) + continue; + + // If the MI clobbers more then one forwarding register we must remove + // all of them from the working list. + for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs)) + ForwardedRegWorklist.erase(Reg); + + // The describeLoadedValue() hook currently does not have any information + // about which register it should describe in case of multiple defines, so + // for now we only handle instructions where a forwarded register is (at + // least partially) defined by the instruction's single explicit define. + if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty()) + continue; + unsigned Reg = ExplicitFwdRegDefs[0]; + + if (auto ParamValue = TII->describeLoadedValue(*I)) { + if (ParamValue->first.isImm()) { + int64_t Val = ParamValue->first.getImm(); + DbgValueLoc DbgLocVal(ParamValue->second, Val); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ParamValue->first.isReg()) { + Register RegLoc = ParamValue->first.getReg(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + Register FP = TRI->getFrameRegister(*MF); + bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP); + if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) { + DbgValueLoc DbgLocVal(ParamValue->second, + MachineLocation(RegLoc, + /*IsIndirect=*/IsSPorFP)); + finishCallSiteParam(DbgLocVal, Reg); + } else if (ShouldTryEmitEntryVals) { + ForwardedRegWorklist.insert(RegLoc); + RegsForEntryValues[RegLoc] = Reg; + } + } + } + } + + // Emit the call site parameter's value as an entry value. + if (ShouldTryEmitEntryVals) { + // Create an expression where the register's entry value is used. + DIExpression *EntryExpr = DIExpression::get( + MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1}); + for (auto RegEntry : ForwardedRegWorklist) { + unsigned FwdReg = RegEntry; + auto EntryValReg = RegsForEntryValues.find(RegEntry); + if (EntryValReg != RegsForEntryValues.end()) + FwdReg = EntryValReg->second; + + DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry)); + DbgCallSiteParam CSParm(FwdReg, DbgLocVal); + Params.push_back(CSParm); + ++NumCSParams; + } + } +} + +void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP, + DwarfCompileUnit &CU, DIE &ScopeDIE, + const MachineFunction &MF) { + // Add a call site-related attribute (DWARF5, Sec. 3.3.1.3). Do this only if + // the subprogram is required to have one. + if (!SP.areAllCallsDescribed() || !SP.isDefinition()) + return; + + // Use DW_AT_call_all_calls to express that call site entries are present + // for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls + // because one of its requirements is not met: call site entries for + // optimized-out calls are elided. + CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls)); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "TargetInstrInfo not found: cannot label tail calls"); + bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB(); + + // Emit call site entries for each call or tail call in the function. + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB.instrs()) { + // Skip instructions which aren't calls. Both calls and tail-calling jump + // instructions (e.g TAILJMPd64) are classified correctly here. + if (!MI.isCall()) + continue; + + // TODO: Add support for targets with delay slots (see: beginInstruction). + if (MI.hasDelaySlot()) + return; + + // If this is a direct call, find the callee's subprogram. + // In the case of an indirect call find the register that holds + // the callee. + const MachineOperand &CalleeOp = MI.getOperand(0); + if (!CalleeOp.isGlobal() && !CalleeOp.isReg()) + continue; + + unsigned CallReg = 0; + const DISubprogram *CalleeSP = nullptr; + const Function *CalleeDecl = nullptr; + if (CalleeOp.isReg()) { + CallReg = CalleeOp.getReg(); + if (!CallReg) + continue; + } else { + CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal()); + if (!CalleeDecl || !CalleeDecl->getSubprogram()) + continue; + CalleeSP = CalleeDecl->getSubprogram(); + } + + // TODO: Omit call site entries for runtime calls (objc_msgSend, etc). + + bool IsTail = TII->isTailCall(MI); + + // For tail calls, for non-gdb tuning, no return PC information is needed. + // For regular calls (and tail calls in GDB tuning), the return PC + // is needed to disambiguate paths in the call graph which could lead to + // some target function. + const MCExpr *PCOffset = + (IsTail && !tuneForGDB()) ? nullptr + : getFunctionLocalOffsetAfterInsn(&MI); + + // Address of a call-like instruction for a normal call or a jump-like + // instruction for a tail call. This is needed for GDB + DWARF 4 tuning. + const MCSymbol *PCAddr = + ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI)) + : nullptr; + + assert((IsTail || PCOffset || PCAddr) && + "Call without return PC information"); + + LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> " + << (CalleeDecl ? CalleeDecl->getName() + : StringRef(MF.getSubtarget() + .getRegisterInfo() + ->getName(CallReg))) + << (IsTail ? " [IsTail]" : "") << "\n"); + + DIE &CallSiteDIE = + CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr, + PCOffset, CallReg); + + // GDB and LLDB support call site parameter debug info. + if (Asm->TM.Options.EnableDebugEntryValues && + (tuneForGDB() || tuneForLLDB())) { + ParamSet Params; + // Try to interpret values of call site parameters. + collectCallSiteParameters(&MI, Params); + CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params); + } + } + } +} + +void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { + if (!U.hasDwarfPubSections()) + return; + + U.addFlag(D, dwarf::DW_AT_GNU_pubnames); +} + +void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, + DwarfCompileUnit &NewCU) { + DIE &Die = NewCU.getUnitDie(); + StringRef FN = DIUnit->getFilename(); + + StringRef Producer = DIUnit->getProducer(); + StringRef Flags = DIUnit->getFlags(); + if (!Flags.empty() && !useAppleExtensionAttributes()) { + std::string ProducerWithFlags = Producer.str() + " " + Flags.str(); + NewCU.addString(Die, dwarf::DW_AT_producer, ProducerWithFlags); + } else + NewCU.addString(Die, dwarf::DW_AT_producer, Producer); + + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit->getSourceLanguage()); + NewCU.addString(Die, dwarf::DW_AT_name, FN); + + // Add DW_str_offsets_base to the unit DIE, except for split units. + if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) + NewCU.addStringOffsetsStart(); + + if (!useSplitDwarf()) { + NewCU.initStmtList(); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + addGnuPubAttributes(NewCU, Die); + } + + if (useAppleExtensionAttributes()) { + if (DIUnit->isOptimized()) + NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); + + StringRef Flags = DIUnit->getFlags(); + if (!Flags.empty()) + NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + + if (unsigned RVer = DIUnit->getRuntimeVersion()) + NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + } + + if (DIUnit->getDWOId()) { + // This CU is either a clang module DWO or a skeleton CU. + NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, + DIUnit->getDWOId()); + if (!DIUnit->getSplitDebugFilename().empty()) + // This is a prefabricated skeleton CU. + NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit->getSplitDebugFilename()); + } +} +// Create new DwarfCompileUnit for the given metadata node with tag +// DW_TAG_compile_unit. +DwarfCompileUnit & +DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { + if (auto *CU = CUMap.lookup(DIUnit)) + return *CU; + + CompilationDir = DIUnit->getDirectory(); + + auto OwnedUnit = std::make_unique<DwarfCompileUnit>( + InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + InfoHolder.addUnit(std::move(OwnedUnit)); + + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + + // LTO with assembly output shares a single line table amongst multiple CUs. + // To avoid the compilation directory being ambiguous, let the line table + // explicitly describe the directory of all files, never relying on the + // compilation directory. + if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) + Asm->OutStreamer->emitDwarfFile0Directive( + CompilationDir, DIUnit->getFilename(), + NewCU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource(), + NewCU.getUniqueID()); + + if (useSplitDwarf()) { + NewCU.setSkeleton(constructSkeletonCU(NewCU)); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); + } else { + finishUnitAttributes(DIUnit, NewCU); + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); + } + + // Create DIEs for function declarations used for call site debug info. + for (auto Scope : DIUnit->getRetainedTypes()) + if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope)) + NewCU.getOrCreateSubprogramDIE(SP); + + CUMap.insert({DIUnit, &NewCU}); + CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); + return NewCU; +} + +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N) { + if (isa<DILocalScope>(N->getScope())) + return; + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); +} + +/// Sort and unique GVEs by comparing their fragment offset. +static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> & +sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) { + llvm::sort( + GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + }); + GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + return A.Expr == B.Expr; + }), + GVEs.end()); + return GVEs; +} + +// Emit all Dwarf sections that should come prior to the content. Create +// global DIEs and emit initial debug info sections. This is invoked by +// the target AsmPrinter. +void DwarfDebug::beginModule() { + NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName, + DWARFGroupDescription, TimePassesIsEnabled); + if (DisableDebugInfoPrinting) { + MMI->setDebugInfoAvailability(false); + return; + } + + const Module *M = MMI->getModule(); + + unsigned NumDebugCUs = std::distance(M->debug_compile_units_begin(), + M->debug_compile_units_end()); + // Tell MMI whether we have debug info. + assert(MMI->hasDebugInfo() == (NumDebugCUs > 0) && + "DebugInfoAvailabilty initialized unexpectedly"); + SingleCU = NumDebugCUs == 1; + DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>> + GVMap; + for (const GlobalVariable &Global : M->globals()) { + SmallVector<DIGlobalVariableExpression *, 1> GVs; + Global.getDebugInfo(GVs); + for (auto *GVE : GVs) + GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); + } + + // Create the symbol that designates the start of the unit's contribution + // to the string offsets table. In a split DWARF scenario, only the skeleton + // unit has the DW_AT_str_offsets_base attribute (and hence needs the symbol). + if (useSegmentedStringOffsetsTable()) + (useSplitDwarf() ? SkeletonHolder : InfoHolder) + .setStringOffsetsStartSym(Asm->createTempSymbol("str_offsets_base")); + + + // Create the symbols that designates the start of the DWARF v5 range list + // and locations list tables. They are located past the table headers. + if (getDwarfVersion() >= 5) { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.setRnglistsTableBaseSym( + Asm->createTempSymbol("rnglists_table_base")); + + if (useSplitDwarf()) + InfoHolder.setRnglistsTableBaseSym( + Asm->createTempSymbol("rnglists_dwo_table_base")); + } + + // Create the symbol that points to the first entry following the debug + // address table (.debug_addr) header. + AddrPool.setLabel(Asm->createTempSymbol("addr_table_base")); + + for (DICompileUnit *CUNode : M->debug_compile_units()) { + // FIXME: Move local imported entities into a list attached to the + // subprogram, then this search won't be needed and a + // getImportedEntities().empty() test should go below with the rest. + bool HasNonLocalImportedEntities = llvm::any_of( + CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { + return !isa<DILocalScope>(IE->getScope()); + }); + + if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && + CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); + + // Global Variables. + for (auto *GVE : CUNode->getGlobalVariables()) { + // Don't bother adding DIGlobalVariableExpressions listed in the CU if we + // already know about the variable and it isn't adding a constant + // expression. + auto &GVMapEntry = GVMap[GVE->getVariable()]; + auto *Expr = GVE->getExpression(); + if (!GVMapEntry.size() || (Expr && Expr->isConstant())) + GVMapEntry.push_back({nullptr, Expr}); + } + DenseSet<DIGlobalVariable *> Processed; + for (auto *GVE : CUNode->getGlobalVariables()) { + DIGlobalVariable *GV = GVE->getVariable(); + if (Processed.insert(GV).second) + CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); + } + + for (auto *Ty : CUNode->getEnumTypes()) { + // The enum types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + CU.getOrCreateTypeDIE(cast<DIType>(Ty)); + } + for (auto *Ty : CUNode->getRetainedTypes()) { + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + if (DIType *RT = dyn_cast<DIType>(Ty)) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); + } + // Emit imported_modules last so that the relevant context is already + // available. + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); + } +} + +void DwarfDebug::finishEntityDefinitions() { + for (const auto &Entity : ConcreteEntities) { + DIE *Die = Entity->getDIE(); + assert(Die); + // FIXME: Consider the time-space tradeoff of just storing the unit pointer + // in the ConcreteEntities list, rather than looking it up again here. + // DIE::getUnit isn't simple - it walks parent pointers, etc. + DwarfCompileUnit *Unit = CUDieMap.lookup(Die->getUnitDie()); + assert(Unit); + Unit->finishEntityDefinition(Entity.get()); + } +} + +void DwarfDebug::finishSubprogramDefinitions() { + for (const DISubprogram *SP : ProcessedSPNodes) { + assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug); + forBothCUs( + getOrCreateDwarfCompileUnit(SP->getUnit()), + [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); }); + } +} + +void DwarfDebug::finalizeModuleInfo() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + finishSubprogramDefinitions(); + + finishEntityDefinitions(); + + // Include the DWO file name in the hash if there's more than one CU. + // This handles ThinLTO's situation where imported CUs may very easily be + // duplicate with the same CU partially imported into another ThinLTO unit. + StringRef DWOName; + if (CUMap.size() > 1) + DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile; + + // Handle anything that needs to be done on a per-unit basis after + // all other generation. + for (const auto &P : CUMap) { + auto &TheCU = *P.second; + if (TheCU.getCUNode()->isDebugDirectivesOnly()) + continue; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. + TheCU.constructContainingTypeDIEs(); + + // Add CU specific attributes if we need to add any. + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + auto *SkCU = TheCU.getSkeleton(); + if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) { + finishUnitAttributes(TheCU.getCUNode(), TheCU); + TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + Asm->TM.Options.MCOptions.SplitDwarfFile); + SkCU->addString(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_name, + Asm->TM.Options.MCOptions.SplitDwarfFile); + // Emit a unique identifier for this CU. + uint64_t ID = + DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); + if (getDwarfVersion() >= 5) { + TheCU.setDWOId(ID); + SkCU->setDWOId(ID); + } else { + TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } + + if (getDwarfVersion() < 5 && !SkeletonHolder.getRangeLists().empty()) { + const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, + Sym, Sym); + } + } else if (SkCU) { + finishUnitAttributes(SkCU->getCUNode(), *SkCU); + } + + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + + if (unsigned NumRanges = TheCU.getRanges().size()) { + if (NumRanges > 1 && useRangesSection()) + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + else + U.setBaseAddress(TheCU.getRanges().front().Begin); + U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); + } + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty() && + (getDwarfVersion() >= 5 || + (SkCU && !TheCU.getUnitDie().children().empty()))) + U.addAddrTableBase(); + + if (getDwarfVersion() >= 5) { + if (U.hasRangeLists()) + U.addRnglistsBase(); + + if (!DebugLocs.getLists().empty() && !useSplitDwarf()) { + DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base, + DebugLocs.getSym(), + TLOF.getDwarfLoclistsSection()->getBeginSymbol()); + } + } + + auto *CUNode = cast<DICompileUnit>(P.first); + // If compile Unit has macros, emit "DW_AT_macro_info" attribute. + if (CUNode->getMacros()) + U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_macro_info, + U.getMacroLabelBegin(), + TLOF.getDwarfMacinfoSection()->getBeginSymbol()); + } + + // Emit all frontend-produced Skeleton CUs, i.e., Clang modules. + for (auto *CUNode : MMI->getModule()->debug_compile_units()) + if (CUNode->getDWOId()) + getOrCreateDwarfCompileUnit(CUNode); + + // Compute DIE offsets and sizes. + InfoHolder.computeSizeAndOffsets(); + if (useSplitDwarf()) + SkeletonHolder.computeSizeAndOffsets(); +} + +// Emit all Dwarf sections that should come after the content. +void DwarfDebug::endModule() { + assert(CurFn == nullptr); + assert(CurMI == nullptr); + + for (const auto &P : CUMap) { + auto &CU = *P.second; + CU.createBaseTypeDIEs(); + } + + // If we aren't actually generating debug info (check beginModule - + // conditionalized on !DisableDebugInfoPrinting and the presence of the + // llvm.dbg.cu metadata node) + if (!MMI->hasDebugInfo()) + return; + + // Finalize the debug info for the module. + finalizeModuleInfo(); + + emitDebugStr(); + + if (useSplitDwarf()) + emitDebugLocDWO(); + else + // Emit info into a debug loc section. + emitDebugLoc(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + + // Emit info into a debug aranges section. + if (GenerateARangeSection) + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacinfo(); + + if (useSplitDwarf()) { + emitDebugStrDWO(); + emitDebugInfoDWO(); + emitDebugAbbrevDWO(); + emitDebugLineDWO(); + emitDebugRangesDWO(); + } + + emitDebugAddr(); + + // Emit info into the dwarf accelerator table sections. + switch (getAccelTableKind()) { + case AccelTableKind::Apple: + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + break; + case AccelTableKind::Dwarf: + emitAccelDebugNames(); + break; + case AccelTableKind::None: + break; + case AccelTableKind::Default: + llvm_unreachable("Default should have already been resolved."); + } + + // Emit the pubnames and pubtypes sections if requested. + emitDebugPubSections(); + + // clean up. + // FIXME: AbstractVariables.clear(); +} + +void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *ScopeNode) { + if (CU.getExistingAbstractEntity(Node)) + return; + + CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope( + cast<DILocalScope>(ScopeNode))); +} + +void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU, + const DINode *Node, const MDNode *ScopeNode) { + if (CU.getExistingAbstractEntity(Node)) + return; + + if (LexicalScope *Scope = + LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) + CU.createAbstractEntity(Node, Scope); +} + +// Collect variable information from side table maintained by MF. +void DwarfDebug::collectVariableInfoFromMFTable( + DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) { + SmallDenseMap<InlinedEntity, DbgVariable *> MFVars; + for (const auto &VI : Asm->MF->getVariableDbgInfo()) { + if (!VI.Var) + continue; + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + InlinedEntity Var(VI.Var, VI.Loc->getInlinedAt()); + Processed.insert(Var); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); + + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode()); + auto RegVar = std::make_unique<DbgVariable>( + cast<DILocalVariable>(Var.first), Var.second); + RegVar->initializeMMI(VI.Expr, VI.Slot); + if (DbgVariable *DbgVar = MFVars.lookup(Var)) + DbgVar->addMMIEntry(*RegVar); + else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) { + MFVars.insert({Var, RegVar.get()}); + ConcreteEntities.push_back(std::move(RegVar)); + } + } +} + +/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its +/// enclosing lexical scope. The check ensures there are no other instructions +/// in the same lexical scope preceding the DBG_VALUE and that its range is +/// either open or otherwise rolls off the end of the scope. +static bool validThroughout(LexicalScopes &LScopes, + const MachineInstr *DbgValue, + const MachineInstr *RangeEnd) { + assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location"); + auto MBB = DbgValue->getParent(); + auto DL = DbgValue->getDebugLoc(); + auto *LScope = LScopes.findLexicalScope(DL); + // Scope doesn't exist; this is a dead DBG_VALUE. + if (!LScope) + return false; + auto &LSRange = LScope->getRanges(); + if (LSRange.size() == 0) + return false; + + // Determine if the DBG_VALUE is valid at the beginning of its lexical block. + const MachineInstr *LScopeBegin = LSRange.front().first; + // Early exit if the lexical scope begins outside of the current block. + if (LScopeBegin->getParent() != MBB) + return false; + MachineBasicBlock::const_reverse_iterator Pred(DbgValue); + for (++Pred; Pred != MBB->rend(); ++Pred) { + if (Pred->getFlag(MachineInstr::FrameSetup)) + break; + auto PredDL = Pred->getDebugLoc(); + if (!PredDL || Pred->isMetaInstruction()) + continue; + // Check whether the instruction preceding the DBG_VALUE is in the same + // (sub)scope as the DBG_VALUE. + if (DL->getScope() == PredDL->getScope()) + return false; + auto *PredScope = LScopes.findLexicalScope(PredDL); + if (!PredScope || LScope->dominates(PredScope)) + return false; + } + + // If the range of the DBG_VALUE is open-ended, report success. + if (!RangeEnd) + return true; + + // Fail if there are instructions belonging to our scope in another block. + const MachineInstr *LScopeEnd = LSRange.back().second; + if (LScopeEnd->getParent() != MBB) + return false; + + // Single, constant DBG_VALUEs in the prologue are promoted to be live + // throughout the function. This is a hack, presumably for DWARF v2 and not + // necessarily correct. It would be much better to use a dbg.declare instead + // if we know the constant is live throughout the scope. + if (DbgValue->getOperand(0).isImm() && MBB->pred_empty()) + return true; + + return false; +} + +/// Build the location list for all DBG_VALUEs in the function that +/// describe the same variable. The resulting DebugLocEntries will have +/// strict monotonically increasing begin addresses and will never +/// overlap. If the resulting list has only one entry that is valid +/// throughout variable's scope return true. +// +// See the definition of DbgValueHistoryMap::Entry for an explanation of the +// different kinds of history map entries. One thing to be aware of is that if +// a debug value is ended by another entry (rather than being valid until the +// end of the function), that entry's instruction may or may not be included in +// the range, depending on if the entry is a clobbering entry (it has an +// instruction that clobbers one or more preceding locations), or if it is an +// (overlapping) debug value entry. This distinction can be seen in the example +// below. The first debug value is ended by the clobbering entry 2, and the +// second and third debug values are ended by the overlapping debug value entry +// 4. +// +// Input: +// +// History map entries [type, end index, mi] +// +// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)] +// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)] +// 2 | | [Clobber, $reg0 = [...], -, -] +// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)] +// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)] +// +// Output [start, end) [Value...]: +// +// [0-1) [(reg0, fragment 0, 32)] +// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)] +// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)] +// [4-) [(@g, fragment 0, 96)] +bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries) { + using OpenRange = + std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>; + SmallVector<OpenRange, 4> OpenRanges; + bool isSafeForSingleLocation = true; + const MachineInstr *StartDebugMI = nullptr; + const MachineInstr *EndMI = nullptr; + + for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) { + const MachineInstr *Instr = EI->getInstr(); + + // Remove all values that are no longer live. + size_t Index = std::distance(EB, EI); + auto Last = + remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; }); + OpenRanges.erase(Last, OpenRanges.end()); + + // If we are dealing with a clobbering entry, this iteration will result in + // a location list entry starting after the clobbering instruction. + const MCSymbol *StartLabel = + EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr); + assert(StartLabel && + "Forgot label before/after instruction starting a range!"); + + const MCSymbol *EndLabel; + if (std::next(EI) == Entries.end()) { + EndLabel = Asm->getFunctionEnd(); + if (EI->isClobber()) + EndMI = EI->getInstr(); + } + else if (std::next(EI)->isClobber()) + EndLabel = getLabelAfterInsn(std::next(EI)->getInstr()); + else + EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr()); + assert(EndLabel && "Forgot label after instruction ending a range!"); + + if (EI->isDbgValue()) + LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n"); + + // If this history map entry has a debug value, add that to the list of + // open ranges and check if its location is valid for a single value + // location. + if (EI->isDbgValue()) { + // Do not add undef debug values, as they are redundant information in + // the location list entries. An undef debug results in an empty location + // description. If there are any non-undef fragments then padding pieces + // with empty location descriptions will automatically be inserted, and if + // all fragments are undef then the whole location list entry is + // redundant. + if (!Instr->isUndefDebugValue()) { + auto Value = getDebugLocValue(Instr); + OpenRanges.emplace_back(EI->getEndIndex(), Value); + + // TODO: Add support for single value fragment locations. + if (Instr->getDebugExpression()->isFragment()) + isSafeForSingleLocation = false; + + if (!StartDebugMI) + StartDebugMI = Instr; + } else { + isSafeForSingleLocation = false; + } + } + + // Location list entries with empty location descriptions are redundant + // information in DWARF, so do not emit those. + if (OpenRanges.empty()) + continue; + + // Omit entries with empty ranges as they do not have any effect in DWARF. + if (StartLabel == EndLabel) { + LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n"); + continue; + } + + SmallVector<DbgValueLoc, 4> Values; + for (auto &R : OpenRanges) + Values.push_back(R.second); + DebugLoc.emplace_back(StartLabel, EndLabel, Values); + + // Attempt to coalesce the ranges of two otherwise identical + // DebugLocEntries. + auto CurEntry = DebugLoc.rbegin(); + LLVM_DEBUG({ + dbgs() << CurEntry->getValues().size() << " Values:\n"; + for (auto &Value : CurEntry->getValues()) + Value.dump(); + dbgs() << "-----\n"; + }); + + auto PrevEntry = std::next(CurEntry); + if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) + DebugLoc.pop_back(); + } + + return DebugLoc.size() == 1 && isSafeForSingleLocation && + validThroughout(LScopes, StartDebugMI, EndMI); +} + +DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU, + LexicalScope &Scope, + const DINode *Node, + const DILocation *Location, + const MCSymbol *Sym) { + ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode()); + if (isa<const DILocalVariable>(Node)) { + ConcreteEntities.push_back( + std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node), + Location)); + InfoHolder.addScopeVariable(&Scope, + cast<DbgVariable>(ConcreteEntities.back().get())); + } else if (isa<const DILabel>(Node)) { + ConcreteEntities.push_back( + std::make_unique<DbgLabel>(cast<const DILabel>(Node), + Location, Sym)); + InfoHolder.addScopeLabel(&Scope, + cast<DbgLabel>(ConcreteEntities.back().get())); + } + return ConcreteEntities.back().get(); +} + +// Find variables for each lexical scope. +void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU, + const DISubprogram *SP, + DenseSet<InlinedEntity> &Processed) { + // Grab the variable info that was squirreled away in the MMI side-table. + collectVariableInfoFromMFTable(TheCU, Processed); + + for (const auto &I : DbgValues) { + InlinedEntity IV = I.first; + if (Processed.count(IV)) + continue; + + // Instruction ranges, specifying where IV is accessible. + const auto &HistoryMapEntries = I.second; + if (HistoryMapEntries.empty()) + continue; + + LexicalScope *Scope = nullptr; + const DILocalVariable *LocalVar = cast<DILocalVariable>(IV.first); + if (const DILocation *IA = IV.second) + Scope = LScopes.findInlinedScope(LocalVar->getScope(), IA); + else + Scope = LScopes.findLexicalScope(LocalVar->getScope()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + Processed.insert(IV); + DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU, + *Scope, LocalVar, IV.second)); + + const MachineInstr *MInsn = HistoryMapEntries.front().getInstr(); + assert(MInsn->isDebugValue() && "History must begin with debug value"); + + // Check if there is a single DBG_VALUE, valid throughout the var's scope. + // If the history map contains a single debug value, there may be an + // additional entry which clobbers the debug value. + size_t HistSize = HistoryMapEntries.size(); + bool SingleValueWithClobber = + HistSize == 2 && HistoryMapEntries[1].isClobber(); + if (HistSize == 1 || SingleValueWithClobber) { + const auto *End = + SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr; + if (validThroughout(LScopes, MInsn, End)) { + RegVar->initializeDbgValue(MInsn); + continue; + } + } + + // Do not emit location lists if .debug_loc secton is disabled. + if (!useLocSection()) + continue; + + // Handle multiple DBG_VALUE instructions describing one variable. + DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn); + + // Build the location list for this variable. + SmallVector<DebugLocEntry, 8> Entries; + bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries); + + // Check whether buildLocationList managed to merge all locations to one + // that is valid throughout the variable's scope. If so, produce single + // value location. + if (isValidSingleLocation) { + RegVar->initializeDbgValue(Entries[0].getValues()[0]); + continue; + } + + // If the variable has a DIBasicType, extract it. Basic types cannot have + // unique identifiers, so don't bother resolving the type with the + // identifier map. + const DIBasicType *BT = dyn_cast<DIBasicType>( + static_cast<const Metadata *>(LocalVar->getType())); + + // Finalize the entry by lowering it into a DWARF bytestream. + for (auto &Entry : Entries) + Entry.finalize(*Asm, List, BT, TheCU); + } + + // For each InlinedEntity collected from DBG_LABEL instructions, convert to + // DWARF-related DbgLabel. + for (const auto &I : DbgLabels) { + InlinedEntity IL = I.first; + const MachineInstr *MI = I.second; + if (MI == nullptr) + continue; + + LexicalScope *Scope = nullptr; + const DILabel *Label = cast<DILabel>(IL.first); + // The scope could have an extra lexical block file. + const DILocalScope *LocalScope = + Label->getScope()->getNonLexicalBlockFileScope(); + // Get inlined DILocation if it is inlined label. + if (const DILocation *IA = IL.second) + Scope = LScopes.findInlinedScope(LocalScope, IA); + else + Scope = LScopes.findLexicalScope(LocalScope); + // If label scope is not found then skip this label. + if (!Scope) + continue; + + Processed.insert(IL); + /// At this point, the temporary label is created. + /// Save the temporary label to DbgLabel entity to get the + /// actually address when generating Dwarf DIE. + MCSymbol *Sym = getLabelBeforeInsn(MI); + createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym); + } + + // Collect info for variables/labels that were optimized out. + for (const DINode *DN : SP->getRetainedNodes()) { + if (!Processed.insert(InlinedEntity(DN, nullptr)).second) + continue; + LexicalScope *Scope = nullptr; + if (auto *DV = dyn_cast<DILocalVariable>(DN)) { + Scope = LScopes.findLexicalScope(DV->getScope()); + } else if (auto *DL = dyn_cast<DILabel>(DN)) { + Scope = LScopes.findLexicalScope(DL->getScope()); + } + + if (Scope) + createConcreteEntity(TheCU, *Scope, DN, nullptr); + } +} + +// Process beginning of an instruction. +void DwarfDebug::beginInstruction(const MachineInstr *MI) { + DebugHandlerBase::beginInstruction(MI); + assert(CurMI); + + const auto *SP = MI->getMF()->getFunction().getSubprogram(); + if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + return; + + // Check if source location changes, but ignore DBG_VALUE and CFI locations. + // If the instruction is part of the function frame setup code, do not emit + // any line record, as there is no correspondence with any user code. + if (MI->isMetaInstruction() || MI->getFlag(MachineInstr::FrameSetup)) + return; + const DebugLoc &DL = MI->getDebugLoc(); + // When we emit a line-0 record, we don't update PrevInstLoc; so look at + // the last line number actually emitted, to see if it was line 0. + unsigned LastAsmLine = + Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine(); + + // Request a label after the call in order to emit AT_return_pc information + // in call site entries. TODO: Add support for targets with delay slots. + if (SP->areAllCallsDescribed() && MI->isCall() && !MI->hasDelaySlot()) + requestLabelAfterInsn(MI); + + if (DL == PrevInstLoc) { + // If we have an ongoing unspecified location, nothing to do here. + if (!DL) + return; + // We have an explicit location, same as the previous location. + // But we might be coming back to it after a line 0 record. + if (LastAsmLine == 0 && DL.getLine() != 0) { + // Reinstate the source location but not marked as a statement. + const MDNode *Scope = DL.getScope(); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0); + } + return; + } + + if (!DL) { + // We have an unspecified location, which might want to be line 0. + // If we have already emitted a line-0 record, don't repeat it. + if (LastAsmLine == 0) + return; + // If user said Don't Do That, don't do that. + if (UnknownLocations == Disable) + return; + // See if we have a reason to emit a line-0 record now. + // Reasons to emit a line-0 record include: + // - User asked for it (UnknownLocations). + // - Instruction has a label, so it's referenced from somewhere else, + // possibly debug information; we want it to have a source location. + // - Instruction is at the top of a block; we don't want to inherit the + // location from the physically previous (maybe unrelated) block. + if (UnknownLocations == Enable || PrevLabel || + (PrevInstBB && PrevInstBB != MI->getParent())) { + // Preserve the file and column numbers, if we can, to save space in + // the encoded line table. + // Do not update PrevInstLoc, it remembers the last non-0 line. + const MDNode *Scope = nullptr; + unsigned Column = 0; + if (PrevInstLoc) { + Scope = PrevInstLoc.getScope(); + Column = PrevInstLoc.getCol(); + } + recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0); + } + return; + } + + // We have an explicit location, different from the previous location. + // Don't repeat a line-0 record, but otherwise emit the new location. + // (The new location might be an explicit line 0, which we do emit.) + if (DL.getLine() == 0 && LastAsmLine == 0) + return; + unsigned Flags = 0; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT; + PrologEndLoc = DebugLoc(); + } + // If the line changed, we call that a new statement; unless we went to + // line 0 and came back, in which case it is not a new statement. + unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine; + if (DL.getLine() && DL.getLine() != OldLine) + Flags |= DWARF2_FLAG_IS_STMT; + + const MDNode *Scope = DL.getScope(); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); + + // If we're not at line 0, remember this location. + if (DL.getLine()) + PrevInstLoc = DL; +} + +static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + for (const auto &MBB : *MF) + for (const auto &MI : MBB) + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && + MI.getDebugLoc()) + return MI.getDebugLoc(); + return DebugLoc(); +} + +/// Register a source line with debug info. Returns the unique label that was +/// emitted and which provides correspondence to the source line list. +static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col, + const MDNode *S, unsigned Flags, unsigned CUID, + uint16_t DwarfVersion, + ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) { + StringRef Fn; + unsigned FileNo = 1; + unsigned Discriminator = 0; + if (auto *Scope = cast_or_null<DIScope>(S)) { + Fn = Scope->getFilename(); + if (Line != 0 && DwarfVersion >= 4) + if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) + Discriminator = LBF->getDiscriminator(); + + FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID]) + .getOrCreateSourceID(Scope->getFile()); + } + Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0, + Discriminator, Fn); +} + +DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF, + unsigned CUID) { + // Get beginning of function. + if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) { + // Ensure the compile unit is created if the function is called before + // beginFunction(). + (void)getOrCreateDwarfCompileUnit( + MF.getFunction().getSubprogram()->getUnit()); + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); + ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT, + CUID, getDwarfVersion(), getUnits()); + return PrologEndLoc; + } + return DebugLoc(); +} + +// Gather pre-function debug information. Assumes being called immediately +// after the function entry point has been emitted. +void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { + CurFn = MF; + + auto *SP = MF->getFunction().getSubprogram(); + assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); + if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + return; + + SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(), + Asm->getFunctionBegin())); + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. + if (Asm->OutStreamer->hasRawTextSupport()) + // Use a single line table if we are generating assembly. + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); + else + Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); + + // Record beginning of function. + PrologEndLoc = emitInitialLocDirective( + *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID()); +} + +void DwarfDebug::skippedNonDebugFunction() { + // If we don't have a subprogram for this function then there will be a hole + // in the range information. Keep note of this by setting the previously used + // section to nullptr. + PrevCU = nullptr; + CurFn = nullptr; +} + +// Gather and emit post-function debug information. +void DwarfDebug::endFunctionImpl(const MachineFunction *MF) { + const DISubprogram *SP = MF->getFunction().getSubprogram(); + + assert(CurFn == MF && + "endFunction should be called with the same function as beginFunction"); + + // Set DwarfDwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); + + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + assert(!FnScope || SP == FnScope->getScopeNode()); + DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit()); + if (TheCU.getCUNode()->isDebugDirectivesOnly()) { + PrevLabel = nullptr; + CurFn = nullptr; + return; + } + + DenseSet<InlinedEntity> Processed; + collectEntityInfo(TheCU, SP, Processed); + + // Add the range of this function to the list of ranges for the CU. + TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()}); + + // Under -gmlt, skip building the subprogram if there are no inlined + // subroutines inside it. But with -fdebug-info-for-profiling, the subprogram + // is still needed as we need its source location. + if (!TheCU.getCUNode()->getDebugInfoForProfiling() && + TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly && + LScopes.getAbstractScopesList().empty() && !IsDarwin) { + assert(InfoHolder.getScopeVariables().empty()); + PrevLabel = nullptr; + CurFn = nullptr; + return; + } + +#ifndef NDEBUG + size_t NumAbstractScopes = LScopes.getAbstractScopesList().size(); +#endif + // Construct abstract scopes. + for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { + auto *SP = cast<DISubprogram>(AScope->getScopeNode()); + for (const DINode *DN : SP->getRetainedNodes()) { + if (!Processed.insert(InlinedEntity(DN, nullptr)).second) + continue; + + const MDNode *Scope = nullptr; + if (auto *DV = dyn_cast<DILocalVariable>(DN)) + Scope = DV->getScope(); + else if (auto *DL = dyn_cast<DILabel>(DN)) + Scope = DL->getScope(); + else + llvm_unreachable("Unexpected DI type!"); + + // Collect info for variables/labels that were optimized out. + ensureAbstractEntityIsCreated(TheCU, DN, Scope); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractEntityIsCreated inserted abstract scopes"); + } + constructAbstractSubprogramScopeDIE(TheCU, AScope); + } + + ProcessedSPNodes.insert(SP); + DIE &ScopeDIE = TheCU.constructSubprogramScopeDIE(SP, FnScope); + if (auto *SkelCU = TheCU.getSkeleton()) + if (!LScopes.getAbstractScopesList().empty() && + TheCU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructSubprogramScopeDIE(SP, FnScope); + + // Construct call site entries. + constructCallSiteEntryDIEs(*SP, TheCU, ScopeDIE, *MF); + + // Clear debug info + // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the + // DbgVariables except those that are also in AbstractVariables (since they + // can be used cross-function) + InfoHolder.getScopeVariables().clear(); + InfoHolder.getScopeLabels().clear(); + PrevLabel = nullptr; + CurFn = nullptr; +} + +// Register a source line with debug info. Returns the unique label that was +// emitted and which provides correspondence to the source line list. +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, + unsigned Flags) { + ::recordSourceLine(*Asm, Line, Col, S, Flags, + Asm->OutStreamer->getContext().getDwarfCompileUnitID(), + getDwarfVersion(), getUnits()); +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +// Emit the debug info section. +void DwarfDebug::emitDebugInfo() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitUnits(/* UseOffsets */ false); +} + +// Emit the abbreviation section. +void DwarfDebug::emitAbbreviations() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); +} + +void DwarfDebug::emitStringOffsetsTableHeader() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.getStringPool().emitStringOffsetsTableHeader( + *Asm, Asm->getObjFileLowering().getDwarfStrOffSection(), + Holder.getStringOffsetsStartSym()); +} + +template <typename AccelTableT> +void DwarfDebug::emitAccel(AccelTableT &Accel, MCSection *Section, + StringRef TableName) { + Asm->OutStreamer->SwitchSection(Section); + + // Emit the full data. + emitAppleAccelTable(Asm, Accel, TableName, Section->getBeginSymbol()); +} + +void DwarfDebug::emitAccelDebugNames() { + // Don't emit anything if we have no compilation units to index. + if (getUnits().empty()) + return; + + emitDWARF5AccelTable(Asm, AccelDebugNames, *this, getUnits()); +} + +// Emit visible names into a hashed accelerator table section. +void DwarfDebug::emitAccelNames() { + emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), + "Names"); +} + +// Emit objective C classes and categories into a hashed accelerator table +// section. +void DwarfDebug::emitAccelObjC() { + emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), + "ObjC"); +} + +// Emit namespace dies into a hashed accelerator table. +void DwarfDebug::emitAccelNamespaces() { + emitAccel(AccelNamespace, + Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), + "namespac"); +} + +// Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), + "types"); +} + +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, + const DIE *Die) { + // Entities that ended up only in a Type Unit reference the CU instead (since + // the pub entry has offsets within the CU there's no real offset that can be + // provided anyway). As it happens all such entities (namespaces and types, + // types only in C++ at that) are rendered as TYPE+EXTERNAL. If this turns out + // not to be true it would be necessary to persist this information from the + // point at which the entry is added to the index data structure - since by + // the time the index is built from that, the original type/namespace DIE in a + // type unit has already been destroyed so it can't be queried for properties + // like tag, etc. + if (Die->getTag() == dwarf::DW_TAG_compile_unit) + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, + dwarf::GIEL_EXTERNAL); + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) { + DIE &SpecDIE = SpecVal.getDIEEntry().getEntry(); + if (SpecDIE.findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, + dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubSections - Emit visible names and types into debug pubnames and +/// pubtypes sections. +void DwarfDebug::emitDebugPubSections() { + for (const auto &NU : CUMap) { + DwarfCompileUnit *TheU = NU.second; + if (!TheU->hasDwarfPubSections()) + continue; + + bool GnuStyle = TheU->getCUNode()->getNameTableKind() == + DICompileUnit::DebugNameTableKind::GNU; + + Asm->OutStreamer->SwitchSection( + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection()); + emitDebugPubSection(GnuStyle, "Names", TheU, TheU->getGlobalNames()); + + Asm->OutStreamer->SwitchSection( + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection()); + emitDebugPubSection(GnuStyle, "Types", TheU, TheU->getGlobalTypes()); + } +} + +void DwarfDebug::emitSectionReference(const DwarfCompileUnit &CU) { + if (useSectionsAsReferences()) + Asm->EmitDwarfOffset(CU.getSection()->getBeginSymbol(), + CU.getDebugSectionOffset()); + else + Asm->emitDwarfSymbolReference(CU.getLabelBegin()); +} + +void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name, + DwarfCompileUnit *TheU, + const StringMap<const DIE *> &Globals) { + if (auto *Skeleton = TheU->getSkeleton()) + TheU = Skeleton; + + // Emit the header. + Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + + Asm->OutStreamer->EmitLabel(BeginLabel); + + Asm->OutStreamer->AddComment("DWARF Version"); + Asm->emitInt16(dwarf::DW_PUBNAMES_VERSION); + + Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); + emitSectionReference(*TheU); + + Asm->OutStreamer->AddComment("Compilation Unit Length"); + Asm->emitInt32(TheU->getLength()); + + // Emit the pubnames for this compilation unit. + for (const auto &GI : Globals) { + const char *Name = GI.getKeyData(); + const DIE *Entity = GI.second; + + Asm->OutStreamer->AddComment("DIE offset"); + Asm->emitInt32(Entity->getOffset()); + + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); + Asm->OutStreamer->AddComment( + Twine("Attributes: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + + ", " + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->emitInt8(Desc.toBits()); + } + + Asm->OutStreamer->AddComment("External Name"); + Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); + } + + Asm->OutStreamer->AddComment("End Mark"); + Asm->emitInt32(0); + Asm->OutStreamer->EmitLabel(EndLabel); +} + +/// Emit null-terminated strings into a debug str section. +void DwarfDebug::emitDebugStr() { + MCSection *StringOffsetsSection = nullptr; + if (useSegmentedStringOffsetsTable()) { + emitStringOffsetsTableHeader(); + StringOffsetsSection = Asm->getObjFileLowering().getDwarfStrOffSection(); + } + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection(), + StringOffsetsSection, /* UseRelativeOffsets = */ true); +} + +void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU) { + auto &&Comments = DebugLocs.getComments(Entry); + auto Comment = Comments.begin(); + auto End = Comments.end(); + + // The expressions are inserted into a byte stream rather early (see + // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that + // need to reference a base_type DIE the offset of that DIE is not yet known. + // To deal with this we instead insert a placeholder early and then extract + // it here and replace it with the real reference. + unsigned PtrSize = Asm->MAI->getCodePointerSize(); + DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(), + DebugLocs.getBytes(Entry).size()), + Asm->getDataLayout().isLittleEndian(), PtrSize); + DWARFExpression Expr(Data, getDwarfVersion(), PtrSize); + + using Encoding = DWARFExpression::Operation::Encoding; + uint64_t Offset = 0; + for (auto &Op : Expr) { + assert(Op.getCode() != dwarf::DW_OP_const_type && + "3 operand ops not yet supported"); + Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : ""); + Offset++; + for (unsigned I = 0; I < 2; ++I) { + if (Op.getDescription().Op[I] == Encoding::SizeNA) + continue; + if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) { + if (CU) { + uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset(); + assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit"); + Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize); + } else { + // Emit a reference to the 'generic type'. + Asm->EmitULEB128(0, nullptr, ULEB128PadSize); + } + // Make sure comments stay aligned. + for (unsigned J = 0; J < ULEB128PadSize; ++J) + if (Comment != End) + Comment++; + } else { + for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J) + Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : ""); + } + Offset = Op.getOperandEndOffset(I); + } + assert(Offset == Op.getEndOffset()); + } +} + +void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + const DbgValueLoc &Value, + DwarfExpression &DwarfExpr) { + auto *DIExpr = Value.getExpression(); + DIExpressionCursor ExprCursor(DIExpr); + DwarfExpr.addFragmentOffset(DIExpr); + // Regular entry. + if (Value.isInt()) { + if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || + BT->getEncoding() == dwarf::DW_ATE_signed_char)) + DwarfExpr.addSignedConstant(Value.getInt()); + else + DwarfExpr.addUnsignedConstant(Value.getInt()); + } else if (Value.isLocation()) { + MachineLocation Location = Value.getLoc(); + if (Location.isIndirect()) + DwarfExpr.setMemoryLocationKind(); + DIExpressionCursor Cursor(DIExpr); + + if (DIExpr->isEntryValue()) { + DwarfExpr.setEntryValueFlag(); + DwarfExpr.beginEntryValueExpression(Cursor); + } + + const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo(); + if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg())) + return; + return DwarfExpr.addExpression(std::move(Cursor)); + } else if (Value.isConstantFP()) { + APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt(); + DwarfExpr.addUnsignedConstant(RawBytes); + } + DwarfExpr.addExpression(std::move(ExprCursor)); +} + +void DebugLocEntry::finalize(const AsmPrinter &AP, + DebugLocStream::ListBuilder &List, + const DIBasicType *BT, + DwarfCompileUnit &TheCU) { + assert(!Values.empty() && + "location list entries without values are redundant"); + assert(Begin != End && "unexpected location list entry with empty range"); + DebugLocStream::EntryBuilder Entry(List, Begin, End); + BufferByteStreamer Streamer = Entry.getStreamer(); + DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU); + const DbgValueLoc &Value = Values[0]; + if (Value.isFragment()) { + // Emit all fragments that belong to the same variable and range. + assert(llvm::all_of(Values, [](DbgValueLoc P) { + return P.isFragment(); + }) && "all values are expected to be fragments"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "fragments are expected to be sorted"); + + for (auto Fragment : Values) + DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr); + + } else { + assert(Values.size() == 1 && "only fragments may have >1 value"); + DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr); + } + DwarfExpr.finalize(); +} + +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU) { + // Emit the size. + Asm->OutStreamer->AddComment("Loc expr size"); + if (getDwarfVersion() >= 5) + Asm->EmitULEB128(DebugLocs.getBytes(Entry).size()); + else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max()) + Asm->emitInt16(DebugLocs.getBytes(Entry).size()); + else { + // The entry is too big to fit into 16 bit, drop it as there is nothing we + // can do. + Asm->emitInt16(0); + return; + } + // Emit the entry. + APByteStreamer Streamer(*Asm); + emitDebugLocEntry(Streamer, Entry, CU); +} + +// Emit the common part of the DWARF 5 range/locations list tables header. +static void emitListsTableHeaderStart(AsmPrinter *Asm, + MCSymbol *TableStart, + MCSymbol *TableEnd) { + // Build the table header, which starts with the length field. + Asm->OutStreamer->AddComment("Length"); + Asm->EmitLabelDifference(TableEnd, TableStart, 4); + Asm->OutStreamer->EmitLabel(TableStart); + // Version number (DWARF v5 and later). + Asm->OutStreamer->AddComment("Version"); + Asm->emitInt16(Asm->OutStreamer->getContext().getDwarfVersion()); + // Address size. + Asm->OutStreamer->AddComment("Address size"); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); + // Segment selector size. + Asm->OutStreamer->AddComment("Segment selector size"); + Asm->emitInt8(0); +} + +// Emit the header of a DWARF 5 range list table list table. Returns the symbol +// that designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm, + const DwarfFile &Holder) { + MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start"); + MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end"); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); + + Asm->OutStreamer->AddComment("Offset entry count"); + Asm->emitInt32(Holder.getRangeLists().size()); + Asm->OutStreamer->EmitLabel(Holder.getRnglistsTableBaseSym()); + + for (const RangeSpanList &List : Holder.getRangeLists()) + Asm->EmitLabelDifference(List.getSym(), Holder.getRnglistsTableBaseSym(), + 4); + + return TableEnd; +} + +// Emit the header of a DWARF 5 locations list table. Returns the symbol that +// designates the end of the table for the caller to emit when the table is +// complete. +static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm, + const DwarfDebug &DD) { + MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start"); + MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end"); + emitListsTableHeaderStart(Asm, TableStart, TableEnd); + + const auto &DebugLocs = DD.getDebugLocs(); + + // FIXME: Generate the offsets table and use DW_FORM_loclistx with the + // DW_AT_loclists_base attribute. Until then set the number of offsets to 0. + Asm->OutStreamer->AddComment("Offset entry count"); + Asm->emitInt32(0); + Asm->OutStreamer->EmitLabel(DebugLocs.getSym()); + + return TableEnd; +} + +template <typename Ranges, typename PayloadEmitter> +static void emitRangeList( + DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R, + const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair, + unsigned StartxLength, unsigned EndOfList, + StringRef (*StringifyEnum)(unsigned), + bool ShouldUseBaseAddress, + PayloadEmitter EmitPayload) { + + auto Size = Asm->MAI->getCodePointerSize(); + bool UseDwarf5 = DD.getDwarfVersion() >= 5; + + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(Sym); + + // Gather all the ranges that apply to the same section so they can share + // a base address entry. + MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges; + + for (const auto &Range : R) + SectionRanges[&Range.Begin->getSection()].push_back(&Range); + + const MCSymbol *CUBase = CU.getBaseAddress(); + bool BaseIsSet = false; + for (const auto &P : SectionRanges) { + auto *Base = CUBase; + if (!Base && ShouldUseBaseAddress) { + const MCSymbol *Begin = P.second.front()->Begin; + const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection()); + if (!UseDwarf5) { + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->AddComment(" base address"); + Asm->OutStreamer->EmitSymbolValue(Base, Size); + } else if (NewBase != Begin || P.second.size() > 1) { + // Only use a base address if + // * the existing pool address doesn't match (NewBase != Begin) + // * or, there's more than one entry to share the base address + Base = NewBase; + BaseIsSet = true; + Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx)); + Asm->emitInt8(BaseAddressx); + Asm->OutStreamer->AddComment(" base address index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Base)); + } + } else if (BaseIsSet && !UseDwarf5) { + BaseIsSet = false; + assert(!Base); + Asm->OutStreamer->EmitIntValue(-1, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + + for (const auto *RS : P.second) { + const MCSymbol *Begin = RS->Begin; + const MCSymbol *End = RS->End; + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (Base) { + if (UseDwarf5) { + // Emit offset_pair when we have a base. + Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair)); + Asm->emitInt8(OffsetPair); + Asm->OutStreamer->AddComment(" starting offset"); + Asm->EmitLabelDifferenceAsULEB128(Begin, Base); + Asm->OutStreamer->AddComment(" ending offset"); + Asm->EmitLabelDifferenceAsULEB128(End, Base); + } else { + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } + } else if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(StartxLength)); + Asm->emitInt8(StartxLength); + Asm->OutStreamer->AddComment(" start index"); + Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin)); + Asm->OutStreamer->AddComment(" length"); + Asm->EmitLabelDifferenceAsULEB128(End, Begin); + } else { + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); + } + EmitPayload(*RS); + } + } + + if (UseDwarf5) { + Asm->OutStreamer->AddComment(StringifyEnum(EndOfList)); + Asm->emitInt8(EndOfList); + } else { + // Terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } +} + +static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) { + emitRangeList( + DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU, + dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair, + dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list, + llvm::dwarf::LocListEncodingString, + /* ShouldUseBaseAddress */ true, + [&](const DebugLocStream::Entry &E) { + DD.emitDebugLocEntryLocation(E, List.CU); + }); +} + +// Emit locations into the .debug_loc/.debug_rnglists section. +void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLoclistsSection()); + TableEnd = emitLoclistsTableHeader(Asm, *this); + } else { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + } + + for (const auto &List : DebugLocs.getLists()) + emitLocList(*this, Asm, List); + + if (TableEnd) + Asm->OutStreamer->EmitLabel(TableEnd); +} + +void DwarfDebug::emitDebugLocDWO() { + for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocDWOSection()); + Asm->OutStreamer->EmitLabel(List.Label); + for (const auto &Entry : DebugLocs.getEntries(List)) { + // GDB only supports startx_length in pre-standard split-DWARF. + // (in v5 standard loclists, it currently* /only/ supports base_address + + // offset_pair, so the implementations can't really share much since they + // need to use different representations) + // * as of October 2018, at least + // Ideally/in v5, this could use SectionLabels to reuse existing addresses + // in the address pool to minimize object size/relocations. + Asm->emitInt8(dwarf::DW_LLE_startx_length); + unsigned idx = AddrPool.getIndex(Entry.Begin); + Asm->EmitULEB128(idx); + Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4); + + emitDebugLocEntryLocation(Entry, List.CU); + } + Asm->emitInt8(dwarf::DW_LLE_end_of_list); + } +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. +void DwarfDebug::emitDebugARanges() { + // Provides a unique id per text section. + MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap; + + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[nullptr].push_back(SCU); + } + } + + DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; + + for (auto &I : SectionMap) { + MCSection *Section = I.first; + SmallVector<SymbolCU, 8> &List = I.second; + if (List.size() < 1) + continue; + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (!Section) { + for (const SymbolCU &Cur : List) { + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = nullptr; + assert(Cur.CU); + Spans[Cur.CU].push_back(Span); + } + continue; + } + + // Sort the symbols by offset within the section. + llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); + + // Insert a final terminator. + List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section))); + + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1, e = List.size(); n < e; n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + assert(Prev.CU); + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + + // Start the dwarf aranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); + + unsigned PtrSize = Asm->MAI->getCodePointerSize(); + + // Build a list of CUs used. + std::vector<DwarfCompileUnit *> CUs; + for (const auto &it : Spans) { + DwarfCompileUnit *CU = it.first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + llvm::sort(CUs, [](const DwarfCompileUnit *A, const DwarfCompileUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); + + // Emit an arange table for each CU we used. + for (DwarfCompileUnit *CU : CUs) { + std::vector<ArangeSpan> &List = Spans[CU]; + + // Describe the skeleton CU's offset and length, not the dwo file's. + if (auto *Skel = CU->getSkeleton()) + CU = Skel; + + // Emit size of content not including length itself. + unsigned ContentSize = + sizeof(int16_t) + // DWARF ARange version number + sizeof(int32_t) + // Offset of CU in the .debug_info section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = + offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize)); + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer->AddComment("Length of ARange Set"); + Asm->emitInt32(ContentSize); + Asm->OutStreamer->AddComment("DWARF Arange version number"); + Asm->emitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); + emitSectionReference(*CU); + Asm->OutStreamer->AddComment("Address Size (in bytes)"); + Asm->emitInt8(PtrSize); + Asm->OutStreamer->AddComment("Segment Size (in bytes)"); + Asm->emitInt8(0); + + Asm->OutStreamer->emitFill(Padding, 0xff); + + for (const ArangeSpan &Span : List) { + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer->EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer->AddComment("ARange terminator"); + Asm->OutStreamer->EmitIntValue(0, PtrSize); + Asm->OutStreamer->EmitIntValue(0, PtrSize); + } +} + +/// Emit a single range list. We handle both DWARF v5 and earlier. +static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm, + const RangeSpanList &List) { + emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(), + dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair, + dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list, + llvm::dwarf::RangeListEncodingString, + List.getCU().getCUNode()->getRangesBaseAddress() || + DD.getDwarfVersion() >= 5, + [](auto) {}); +} + +static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm, + const DwarfFile &Holder, MCSymbol *TableEnd) { + for (const RangeSpanList &List : Holder.getRangeLists()) + emitRangeList(DD, Asm, List); + + if (TableEnd) + Asm->OutStreamer->EmitLabel(TableEnd); +} + +/// Emit address ranges into the .debug_ranges section or into the DWARF v5 +/// .debug_rnglists section. +void DwarfDebug::emitDebugRanges() { + if (CUMap.empty()) + return; + + const auto &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + if (Holder.getRangeLists().empty()) + return; + + assert(useRangesSection()); + assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })); + + // Start the dwarf ranges section. + MCSymbol *TableEnd = nullptr; + if (getDwarfVersion() >= 5) { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRnglistsSection()); + TableEnd = emitRnglistsTableHeader(Asm, Holder); + } else + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + + emitDebugRangesImpl(*this, Asm, Holder, TableEnd); +} + +void DwarfDebug::emitDebugRangesDWO() { + assert(useSplitDwarf()); + + if (CUMap.empty()) + return; + + const auto &Holder = InfoHolder; + + if (Holder.getRangeLists().empty()) + return; + + assert(getDwarfVersion() >= 5); + assert(useRangesSection()); + assert(llvm::none_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })); + + // Start the dwarf ranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRnglistsDWOSection()); + MCSymbol *TableEnd = emitRnglistsTableHeader(Asm, Holder); + + emitDebugRangesImpl(*this, Asm, Holder, TableEnd); +} + +void DwarfDebug::handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U) { + for (auto *MN : Nodes) { + if (auto *M = dyn_cast<DIMacro>(MN)) + emitMacro(*M); + else if (auto *F = dyn_cast<DIMacroFile>(MN)) + emitMacroFile(*F, U); + else + llvm_unreachable("Unexpected DI type!"); + } +} + +void DwarfDebug::emitMacro(DIMacro &M) { + Asm->EmitULEB128(M.getMacinfoType()); + Asm->EmitULEB128(M.getLine()); + StringRef Name = M.getName(); + StringRef Value = M.getValue(); + Asm->OutStreamer->EmitBytes(Name); + if (!Value.empty()) { + // There should be one space between macro name and macro value. + Asm->emitInt8(' '); + Asm->OutStreamer->EmitBytes(Value); + } + Asm->emitInt8('\0'); +} + +void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { + assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file); + Asm->EmitULEB128(dwarf::DW_MACINFO_start_file); + Asm->EmitULEB128(F.getLine()); + Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile())); + handleMacroNodes(F.getElements(), U); + Asm->EmitULEB128(dwarf::DW_MACINFO_end_file); +} + +/// Emit macros into a debug macinfo section. +void DwarfDebug::emitDebugMacinfo() { + if (CUMap.empty()) + return; + + if (llvm::all_of(CUMap, [](const decltype(CUMap)::value_type &Pair) { + return Pair.second->getCUNode()->isDebugDirectivesOnly(); + })) + return; + + // Start the dwarf macinfo section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfMacinfoSection()); + + for (const auto &P : CUMap) { + auto &TheCU = *P.second; + if (TheCU.getCUNode()->isDebugDirectivesOnly()) + continue; + auto *SkCU = TheCU.getSkeleton(); + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + auto *CUNode = cast<DICompileUnit>(P.first); + DIMacroNodeArray Macros = CUNode->getMacros(); + if (!Macros.empty()) { + Asm->OutStreamer->EmitLabel(U.getMacroLabelBegin()); + handleMacroNodes(Macros, U); + } + } + Asm->OutStreamer->AddComment("End Of Macro List Mark"); + Asm->emitInt8(0); +} + +// DWARF5 Experimental Separate Dwarf emitters. + +void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfCompileUnit> NewU) { + + if (!CompilationDir.empty()) + NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + addGnuPubAttributes(*NewU, Die); + + SkeletonHolder.addUnit(std::move(NewU)); +} + +DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { + + auto OwnedUnit = std::make_unique<DwarfCompileUnit>( + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection()); + + NewCU.initStmtList(); + + if (useSegmentedStringOffsetsTable()) + NewCU.addStringOffsetsStart(); + + initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); + + return NewCU; +} + +// Emit the .debug_info.dwo section for separated dwarf. This contains the +// compile units that would normally be in debug_info. +void DwarfDebug::emitDebugInfoDWO() { + assert(useSplitDwarf() && "No split dwarf debug info?"); + // Don't emit relocations into the dwo file. + InfoHolder.emitUnits(/* UseOffsets */ true); +} + +// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the +// abbreviations for the .debug_info.dwo section. +void DwarfDebug::emitDebugAbbrevDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); +} + +void DwarfDebug::emitDebugLineDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + SplitTypeUnitFileTable.Emit( + *Asm->OutStreamer, MCDwarfLineTableParams(), + Asm->getObjFileLowering().getDwarfLineDWOSection()); +} + +void DwarfDebug::emitStringOffsetsTableHeaderDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + InfoHolder.getStringPool().emitStringOffsetsTableHeader( + *Asm, Asm->getObjFileLowering().getDwarfStrOffDWOSection(), + InfoHolder.getStringOffsetsStartSym()); +} + +// Emit the .debug_str.dwo section for separated dwarf. This contains the +// string section and is identical in format to traditional .debug_str +// sections. +void DwarfDebug::emitDebugStrDWO() { + if (useSegmentedStringOffsetsTable()) + emitStringOffsetsTableHeaderDWO(); + assert(useSplitDwarf() && "No split dwarf?"); + MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); + InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), + OffSec, /* UseRelativeOffsets = */ false); +} + +// Emit address pool. +void DwarfDebug::emitDebugAddr() { + AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); +} + +MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { + if (!useSplitDwarf()) + return nullptr; + const DICompileUnit *DIUnit = CU.getCUNode(); + SplitTypeUnitFileTable.maybeSetRootFile( + DIUnit->getDirectory(), DIUnit->getFilename(), + CU.getMD5AsBytes(DIUnit->getFile()), DIUnit->getSource()); + return &SplitTypeUnitFileTable; +} + +uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, so we actually + // need the "high" word. + MD5::MD5Result Result; + Hash.final(Result); + return Result.high(); +} + +void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, + StringRef Identifier, DIE &RefDie, + const DICompositeType *CTy) { + // Fast path if we're building some type units and one has already used the + // address pool we know we're going to throw away all this work anyway, so + // don't bother building dependent types. + if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + return; + + auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); + if (!Ins.second) { + CU.addDIETypeSignature(RefDie, Ins.first->second); + return; + } + + bool TopLevelType = TypeUnitsUnderConstruction.empty(); + AddrPool.resetUsedFlag(); + + auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder, + getDwoLineTable(CU)); + DwarfTypeUnit &NewTU = *OwnedUnit; + DIE &UnitDie = NewTU.getUnitDie(); + TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy); + + NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + CU.getLanguage()); + + uint64_t Signature = makeTypeSignature(Identifier); + NewTU.setTypeSignature(Signature); + Ins.first->second = Signature; + + if (useSplitDwarf()) { + MCSection *Section = + getDwarfVersion() <= 4 + ? Asm->getObjFileLowering().getDwarfTypesDWOSection() + : Asm->getObjFileLowering().getDwarfInfoDWOSection(); + NewTU.setSection(Section); + } else { + MCSection *Section = + getDwarfVersion() <= 4 + ? Asm->getObjFileLowering().getDwarfTypesSection(Signature) + : Asm->getObjFileLowering().getDwarfInfoSection(Signature); + NewTU.setSection(Section); + // Non-split type units reuse the compile unit's line table. + CU.applyStmtList(UnitDie); + } + + // Add DW_AT_str_offsets_base to the type unit DIE, but not for split type + // units. + if (useSegmentedStringOffsetsTable() && !useSplitDwarf()) + NewTU.addStringOffsetsStart(); + + NewTU.setType(NewTU.createTypeDIE(CTy)); + + if (TopLevelType) { + auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction); + TypeUnitsUnderConstruction.clear(); + + // Types referencing entries in the address table cannot be placed in type + // units. + if (AddrPool.hasBeenUsed()) { + + // Remove all the types built while building this type. + // This is pessimistic as some of these types might not be dependent on + // the type that used an address. + for (const auto &TU : TypeUnitsToAdd) + TypeSignatures.erase(TU.second); + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy)); + return; + } + + // If the type wasn't dependent on fission addresses, finish adding the type + // and all its dependent types. + for (auto &TU : TypeUnitsToAdd) { + InfoHolder.computeSizeAndOffsetsForUnit(TU.first.get()); + InfoHolder.emitUnit(TU.first.get(), useSplitDwarf()); + } + } + CU.addDIETypeSignature(RefDie, Signature); +} + +DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) + : DD(DD), + TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) { + DD->TypeUnitsUnderConstruction.clear(); + assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed()); +} + +DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() { + DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction); + DD->AddrPool.resetUsedFlag(); +} + +DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() { + return NonTypeUnitContext(this); +} + +// Add the Name along with its companion DIE to the appropriate accelerator +// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for +// AccelTableKind::Apple, we use the table we got as an argument). If +// accelerator tables are disabled, this function does nothing. +template <typename DataT> +void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU, + AccelTable<DataT> &AppleAccel, StringRef Name, + const DIE &Die) { + if (getAccelTableKind() == AccelTableKind::None) + return; + + if (getAccelTableKind() != AccelTableKind::Apple && + CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default) + return; + + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name); + + switch (getAccelTableKind()) { + case AccelTableKind::Apple: + AppleAccel.addName(Ref, Die); + break; + case AccelTableKind::Dwarf: + AccelDebugNames.addName(Ref, Die); + break; + case AccelTableKind::Default: + llvm_unreachable("Default should have already been resolved."); + case AccelTableKind::None: + llvm_unreachable("None handled above"); + } +} + +void DwarfDebug::addAccelName(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { + addAccelNameImpl(CU, AccelNames, Name, Die); +} + +void DwarfDebug::addAccelObjC(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { + // ObjC names go only into the Apple accelerator tables. + if (getAccelTableKind() == AccelTableKind::Apple) + addAccelNameImpl(CU, AccelObjC, Name, Die); +} + +void DwarfDebug::addAccelNamespace(const DICompileUnit &CU, StringRef Name, + const DIE &Die) { + addAccelNameImpl(CU, AccelNamespace, Name, Die); +} + +void DwarfDebug::addAccelType(const DICompileUnit &CU, StringRef Name, + const DIE &Die, char Flags) { + addAccelNameImpl(CU, AccelTypes, Name, Die); +} + +uint16_t DwarfDebug::getDwarfVersion() const { + return Asm->OutStreamer->getContext().getDwarfVersion(); +} + +const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) { + return SectionLabels.find(S)->second; +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 000000000000..c8c511f67c2a --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,765 @@ +//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H + +#include "AddressPool.h" +#include "DebugLocStream.h" +#include "DebugLocEntry.h" +#include "DwarfFile.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AccelTable.h" +#include "llvm/CodeGen/DbgEntityHistoryCalculator.h" +#include "llvm/CodeGen/DebugHandlerBase.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstdint> +#include <limits> +#include <memory> +#include <utility> +#include <vector> + +namespace llvm { + +class AsmPrinter; +class ByteStreamer; +class DebugLocEntry; +class DIE; +class DwarfCompileUnit; +class DwarfExpression; +class DwarfTypeUnit; +class DwarfUnit; +class LexicalScope; +class MachineFunction; +class MCSection; +class MCSymbol; +class MDNode; +class Module; + +//===----------------------------------------------------------------------===// +/// This class is defined as the common parent of DbgVariable and DbgLabel +/// such that it could levarage polymorphism to extract common code for +/// DbgVariable and DbgLabel. +class DbgEntity { + const DINode *Entity; + const DILocation *InlinedAt; + DIE *TheDIE = nullptr; + unsigned SubclassID; + +public: + enum DbgEntityKind { + DbgVariableKind, + DbgLabelKind + }; + + DbgEntity(const DINode *N, const DILocation *IA, unsigned ID) + : Entity(N), InlinedAt(IA), SubclassID(ID) {} + virtual ~DbgEntity() {} + + /// Accessors. + /// @{ + const DINode *getEntity() const { return Entity; } + const DILocation *getInlinedAt() const { return InlinedAt; } + DIE *getDIE() const { return TheDIE; } + unsigned getDbgEntityID() const { return SubclassID; } + /// @} + + void setDIE(DIE &D) { TheDIE = &D; } + + static bool classof(const DbgEntity *N) { + switch (N->getDbgEntityID()) { + default: + return false; + case DbgVariableKind: + case DbgLabelKind: + return true; + } + } +}; + +//===----------------------------------------------------------------------===// +/// This class is used to track local variable information. +/// +/// Variables can be created from allocas, in which case they're generated from +/// the MMI table. Such variables can have multiple expressions and frame +/// indices. +/// +/// Variables can be created from \c DBG_VALUE instructions. Those whose +/// location changes over time use \a DebugLocListIndex, while those with a +/// single location use \a ValueLoc and (optionally) a single entry of \a Expr. +/// +/// Variables that have been optimized out use none of these fields. +class DbgVariable : public DbgEntity { + /// Offset in DebugLocs. + unsigned DebugLocListIndex = ~0u; + /// Single value location description. + std::unique_ptr<DbgValueLoc> ValueLoc = nullptr; + + struct FrameIndexExpr { + int FI; + const DIExpression *Expr; + }; + mutable SmallVector<FrameIndexExpr, 1> + FrameIndexExprs; /// Frame index + expression. + +public: + /// Construct a DbgVariable. + /// + /// Creates a variable without any DW_AT_location. Call \a initializeMMI() + /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions. + DbgVariable(const DILocalVariable *V, const DILocation *IA) + : DbgEntity(V, IA, DbgVariableKind) {} + + /// Initialize from the MMI table. + void initializeMMI(const DIExpression *E, int FI) { + assert(FrameIndexExprs.empty() && "Already initialized?"); + assert(!ValueLoc.get() && "Already initialized?"); + + assert((!E || E->isValid()) && "Expected valid expression"); + assert(FI != std::numeric_limits<int>::max() && "Expected valid index"); + + FrameIndexExprs.push_back({FI, E}); + } + + // Initialize variable's location. + void initializeDbgValue(DbgValueLoc Value) { + assert(FrameIndexExprs.empty() && "Already initialized?"); + assert(!ValueLoc && "Already initialized?"); + assert(!Value.getExpression()->isFragment() && "Fragments not supported."); + + ValueLoc = std::make_unique<DbgValueLoc>(Value); + if (auto *E = ValueLoc->getExpression()) + if (E->getNumElements()) + FrameIndexExprs.push_back({0, E}); + } + + /// Initialize from a DBG_VALUE instruction. + void initializeDbgValue(const MachineInstr *DbgValue); + + // Accessors. + const DILocalVariable *getVariable() const { + return cast<DILocalVariable>(getEntity()); + } + + const DIExpression *getSingleExpression() const { + assert(ValueLoc.get() && FrameIndexExprs.size() <= 1); + return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr; + } + + void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } + unsigned getDebugLocListIndex() const { return DebugLocListIndex; } + StringRef getName() const { return getVariable()->getName(); } + const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); } + /// Get the FI entries, sorted by fragment offset. + ArrayRef<FrameIndexExpr> getFrameIndexExprs() const; + bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); } + void addMMIEntry(const DbgVariable &V); + + // Translate tag to proper Dwarf tag. + dwarf::Tag getTag() const { + // FIXME: Why don't we just infer this tag and store it all along? + if (getVariable()->isParameter()) + return dwarf::DW_TAG_formal_parameter; + + return dwarf::DW_TAG_variable; + } + + /// Return true if DbgVariable is artificial. + bool isArtificial() const { + if (getVariable()->isArtificial()) + return true; + if (getType()->isArtificial()) + return true; + return false; + } + + bool isObjectPointer() const { + if (getVariable()->isObjectPointer()) + return true; + if (getType()->isObjectPointer()) + return true; + return false; + } + + bool hasComplexAddress() const { + assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable"); + assert((FrameIndexExprs.empty() || + (FrameIndexExprs.size() == 1 && + FrameIndexExprs[0].Expr->getNumElements())) && + "Invalid Expr for DBG_VALUE"); + return !FrameIndexExprs.empty(); + } + + const DIType *getType() const; + + static bool classof(const DbgEntity *N) { + return N->getDbgEntityID() == DbgVariableKind; + } +}; + +//===----------------------------------------------------------------------===// +/// This class is used to track label information. +/// +/// Labels are collected from \c DBG_LABEL instructions. +class DbgLabel : public DbgEntity { + const MCSymbol *Sym; /// Symbol before DBG_LABEL instruction. + +public: + /// We need MCSymbol information to generate DW_AT_low_pc. + DbgLabel(const DILabel *L, const DILocation *IA, const MCSymbol *Sym = nullptr) + : DbgEntity(L, IA, DbgLabelKind), Sym(Sym) {} + + /// Accessors. + /// @{ + const DILabel *getLabel() const { return cast<DILabel>(getEntity()); } + const MCSymbol *getSymbol() const { return Sym; } + + StringRef getName() const { return getLabel()->getName(); } + /// @} + + /// Translate tag to proper Dwarf tag. + dwarf::Tag getTag() const { + return dwarf::DW_TAG_label; + } + + static bool classof(const DbgEntity *N) { + return N->getDbgEntityID() == DbgLabelKind; + } +}; + +/// Used for tracking debug info about call site parameters. +class DbgCallSiteParam { +private: + unsigned Register; ///< Parameter register at the callee entry point. + DbgValueLoc Value; ///< Corresponding location for the parameter value at + ///< the call site. +public: + DbgCallSiteParam(unsigned Reg, DbgValueLoc Val) + : Register(Reg), Value(Val) { + assert(Reg && "Parameter register cannot be undef"); + } + + unsigned getRegister() const { return Register; } + DbgValueLoc getValue() const { return Value; } +}; + +/// Collection used for storing debug call site parameters. +using ParamSet = SmallVector<DbgCallSiteParam, 4>; + +/// Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + + const MCSymbol *Sym; + DwarfCompileUnit *CU; +}; + +/// The kind of accelerator tables we should emit. +enum class AccelTableKind { + Default, ///< Platform default. + None, ///< None. + Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc. + Dwarf, ///< DWARF v5 .debug_names. +}; + +/// Collects and handles dwarf debug information. +class DwarfDebug : public DebugHandlerBase { + /// All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + /// Maps MDNode with its corresponding DwarfCompileUnit. + MapVector<const MDNode *, DwarfCompileUnit *> CUMap; + + /// Maps a CU DIE with its corresponding DwarfCompileUnit. + DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; + + /// List of all labels used in aranges generation. + std::vector<SymbolCU> ArangeLabels; + + /// Size of each symbol emitted (for those symbols that have a specific size). + DenseMap<const MCSymbol *, uint64_t> SymSize; + + /// Collection of abstract variables/labels. + SmallVector<std::unique_ptr<DbgEntity>, 64> ConcreteEntities; + + /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists + /// can refer to them in spite of insertions into this list. + DebugLocStream DebugLocs; + + /// This is a collection of subprogram MDNodes that are processed to + /// create DIEs. + SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>, + SmallPtrSet<const DISubprogram *, 16>> + ProcessedSPNodes; + + /// If nonnull, stores the current machine function we're processing. + const MachineFunction *CurFn = nullptr; + + /// If nonnull, stores the CU in which the previous subprogram was contained. + const DwarfCompileUnit *PrevCU; + + /// As an optimization, there is no need to emit an entry in the directory + /// table for the same directory as DW_AT_comp_dir. + StringRef CompilationDir; + + /// Holder for the file specific debug information. + DwarfFile InfoHolder; + + /// Holders for the various debug information flags that we might need to + /// have exposed. See accessor functions below for description. + + /// Map from MDNodes for user-defined types to their type signatures. Also + /// used to keep track of which types we have emitted type units for. + DenseMap<const MDNode *, uint64_t> TypeSignatures; + + DenseMap<const MCSection *, const MCSymbol *> SectionLabels; + + SmallVector< + std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> + TypeUnitsUnderConstruction; + + /// Whether to use the GNU TLS opcode (instead of the standard opcode). + bool UseGNUTLSOpcode; + + /// Whether to use DWARF 2 bitfields (instead of the DWARF 4 format). + bool UseDWARF2Bitfields; + + /// Whether to emit all linkage names, or just abstract subprograms. + bool UseAllLinkageNames; + + /// Use inlined strings. + bool UseInlineStrings = false; + + /// Allow emission of .debug_ranges section. + bool UseRangesSection = true; + + /// True if the sections itself must be used as references and don't create + /// temp symbols inside DWARF sections. + bool UseSectionsAsReferences = false; + + ///Allow emission of the .debug_loc section. + bool UseLocSection = true; + + /// Generate DWARF v4 type units. + bool GenerateTypeUnits; + + /// DWARF5 Experimental Options + /// @{ + AccelTableKind TheAccelTableKind; + bool HasAppleExtensionAttributes; + bool HasSplitDwarf; + + /// Whether to generate the DWARF v5 string offsets table. + /// It consists of a series of contributions, each preceded by a header. + /// The pre-DWARF v5 string offsets table for split dwarf is, in contrast, + /// a monolithic sequence of string offsets. + bool UseSegmentedStringOffsetsTable; + + /// Separated Dwarf Variables + /// In general these will all be for bits that are left in the + /// original object file, rather than things that are meant + /// to be in the .dwo sections. + + /// Holder for the skeleton information. + DwarfFile SkeletonHolder; + + /// Store file names for type units under fission in a line table + /// header that will be emitted into debug_line.dwo. + // FIXME: replace this with a map from comp_dir to table so that we + // can emit multiple tables during LTO each of which uses directory + // 0, referencing the comp_dir of all the type units that use it. + MCDwarfDwoLineTable SplitTypeUnitFileTable; + /// @} + + /// True iff there are multiple CUs in this module. + bool SingleCU; + bool IsDarwin; + + AddressPool AddrPool; + + /// Accelerator tables. + AccelTable<DWARF5AccelTableData> AccelDebugNames; + AccelTable<AppleAccelTableOffsetData> AccelNames; + AccelTable<AppleAccelTableOffsetData> AccelObjC; + AccelTable<AppleAccelTableOffsetData> AccelNamespace; + AccelTable<AppleAccelTableTypeData> AccelTypes; + + // Identify a debugger for "tuning" the debug info. + DebuggerKind DebuggerTuning = DebuggerKind::Default; + + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); + + const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() { + return InfoHolder.getUnits(); + } + + using InlinedEntity = DbgValueHistoryMap::InlinedEntity; + + void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *Scope); + void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU, + const DINode *Node, + const MDNode *Scope); + + DbgEntity *createConcreteEntity(DwarfCompileUnit &TheCU, + LexicalScope &Scope, + const DINode *Node, + const DILocation *Location, + const MCSymbol *Sym = nullptr); + + /// Construct a DIE for this abstract scope. + void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope); + + /// Construct DIEs for call site entries describing the calls in \p MF. + void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU, + DIE &ScopeDIE, const MachineFunction &MF); + + template <typename DataT> + void addAccelNameImpl(const DICompileUnit &CU, AccelTable<DataT> &AppleAccel, + StringRef Name, const DIE &Die); + + void finishEntityDefinitions(); + + void finishSubprogramDefinitions(); + + /// Finish off debug information after all functions have been + /// processed. + void finalizeModuleInfo(); + + /// Emit the debug info section. + void emitDebugInfo(); + + /// Emit the abbreviation section. + void emitAbbreviations(); + + /// Emit the string offsets table header. + void emitStringOffsetsTableHeader(); + + /// Emit a specified accelerator table. + template <typename AccelTableT> + void emitAccel(AccelTableT &Accel, MCSection *Section, StringRef TableName); + + /// Emit DWARF v5 accelerator table. + void emitAccelDebugNames(); + + /// Emit visible names into a hashed accelerator table section. + void emitAccelNames(); + + /// Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + + /// Emit namespace dies into a hashed accelerator table. + void emitAccelNamespaces(); + + /// Emit type dies into a hashed accelerator table. + void emitAccelTypes(); + + /// Emit visible names and types into debug pubnames and pubtypes sections. + void emitDebugPubSections(); + + void emitDebugPubSection(bool GnuStyle, StringRef Name, + DwarfCompileUnit *TheU, + const StringMap<const DIE *> &Globals); + + /// Emit null-terminated strings into a debug str section. + void emitDebugStr(); + + /// Emit variable locations into a debug loc section. + void emitDebugLoc(); + + /// Emit variable locations into a debug loc dwo section. + void emitDebugLocDWO(); + + /// Emit address ranges into a debug aranges section. + void emitDebugARanges(); + + /// Emit address ranges into a debug ranges section. + void emitDebugRanges(); + void emitDebugRangesDWO(); + + /// Emit macros into a debug macinfo section. + void emitDebugMacinfo(); + void emitMacro(DIMacro &M); + void emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U); + void handleMacroNodes(DIMacroNodeArray Nodes, DwarfCompileUnit &U); + + /// DWARF 5 Experimental Split Dwarf Emitters + + /// Initialize common features of skeleton units. + void initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfCompileUnit> NewU); + + /// Construct the split debug info compile unit for the debug info section. + /// In DWARF v5, the skeleton unit DIE may have the following attributes: + /// DW_AT_addr_base, DW_AT_comp_dir, DW_AT_dwo_name, DW_AT_high_pc, + /// DW_AT_low_pc, DW_AT_ranges, DW_AT_stmt_list, and DW_AT_str_offsets_base. + /// Prior to DWARF v5 it may also have DW_AT_GNU_dwo_id. DW_AT_GNU_dwo_name + /// is used instead of DW_AT_dwo_name, Dw_AT_GNU_addr_base instead of + /// DW_AT_addr_base, and DW_AT_GNU_ranges_base instead of DW_AT_rnglists_base. + DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); + + /// Emit the debug info dwo section. + void emitDebugInfoDWO(); + + /// Emit the debug abbrev dwo section. + void emitDebugAbbrevDWO(); + + /// Emit the debug line dwo section. + void emitDebugLineDWO(); + + /// Emit the dwo stringoffsets table header. + void emitStringOffsetsTableHeaderDWO(); + + /// Emit the debug str dwo section. + void emitDebugStrDWO(); + + /// Emit DWO addresses. + void emitDebugAddr(); + + /// Flags to let the linker know we have emitted new style pubnames. Only + /// emit it here if we don't have a skeleton CU for split dwarf. + void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; + + /// Create new DwarfCompileUnit for the given metadata node with tag + /// DW_TAG_compile_unit. + DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit); + void finishUnitAttributes(const DICompileUnit *DIUnit, + DwarfCompileUnit &NewCU); + + /// Construct imported_module or imported_declaration DIE. + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N); + + /// Register a source line with debug info. Returns the unique + /// label that was emitted and which provides correspondence to the + /// source line list. + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, + unsigned Flags); + + /// Populate LexicalScope entries with variables' info. + void collectEntityInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, + DenseSet<InlinedEntity> &ProcessedVars); + + /// Build the location list for all DBG_VALUEs in the + /// function that describe the same variable. If the resulting + /// list has only one entry that is valid for entire variable's + /// scope return true. + bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::Entries &Entries); + + /// Collect variable information from the side table maintained by MF. + void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU, + DenseSet<InlinedEntity> &P); + + /// Emit the reference to the section. + void emitSectionReference(const DwarfCompileUnit &CU); + +protected: + /// Gather pre-function debug information. + void beginFunctionImpl(const MachineFunction *MF) override; + + /// Gather and emit post-function debug information. + void endFunctionImpl(const MachineFunction *MF) override; + + void skippedNonDebugFunction() override; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(AsmPrinter *A, Module *M); + + ~DwarfDebug() override; + + /// Emit all Dwarf sections that should come prior to the + /// content. + void beginModule(); + + /// Emit all Dwarf sections that should come after the content. + void endModule() override; + + /// Emits inital debug location directive. + DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID); + + /// Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; + + /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. + static uint64_t makeTypeSignature(StringRef Identifier); + + /// Add a DIE to the set of types that we're going to pull into + /// type units. + void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, + DIE &Die, const DICompositeType *CTy); + + friend class NonTypeUnitContext; + class NonTypeUnitContext { + DwarfDebug *DD; + decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; + friend class DwarfDebug; + NonTypeUnitContext(DwarfDebug *DD); + public: + NonTypeUnitContext(NonTypeUnitContext&&) = default; + ~NonTypeUnitContext(); + }; + + NonTypeUnitContext enterNonTypeUnitContext(); + + /// Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { + SymSize[Sym] = Size; + } + + /// Returns whether we should emit all DW_AT_[MIPS_]linkage_name. + /// If not, we still might emit certain cases. + bool useAllLinkageNames() const { return UseAllLinkageNames; } + + /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// standard DW_OP_form_tls_address opcode + bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } + + /// Returns whether to use the DWARF2 format for bitfields instyead of the + /// DWARF4 format. + bool useDWARF2Bitfields() const { return UseDWARF2Bitfields; } + + /// Returns whether to use inline strings. + bool useInlineStrings() const { return UseInlineStrings; } + + /// Returns whether ranges section should be emitted. + bool useRangesSection() const { return UseRangesSection; } + + /// Returns whether to use sections as labels rather than temp symbols. + bool useSectionsAsReferences() const { + return UseSectionsAsReferences; + } + + /// Returns whether .debug_loc section should be emitted. + bool useLocSection() const { return UseLocSection; } + + /// Returns whether to generate DWARF v4 type units. + bool generateTypeUnits() const { return GenerateTypeUnits; } + + // Experimental DWARF5 features. + + /// Returns what kind (if any) of accelerator tables to emit. + AccelTableKind getAccelTableKind() const { return TheAccelTableKind; } + + bool useAppleExtensionAttributes() const { + return HasAppleExtensionAttributes; + } + + /// Returns whether or not to change the current debug info for the + /// split dwarf proposal support. + bool useSplitDwarf() const { return HasSplitDwarf; } + + /// Returns whether to generate a string offsets table with (possibly shared) + /// contributions from each CU and type unit. This implies the use of + /// DW_FORM_strx* indirect references with DWARF v5 and beyond. Note that + /// DW_FORM_GNU_str_index is also an indirect reference, but it is used with + /// a pre-DWARF v5 implementation of split DWARF sections, which uses a + /// monolithic string offsets table. + bool useSegmentedStringOffsetsTable() const { + return UseSegmentedStringOffsetsTable; + } + + bool shareAcrossDWOCUs() const; + + /// Returns the Dwarf Version. + uint16_t getDwarfVersion() const; + + /// Returns the previous CU that was being updated + const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } + + /// Returns the entries for the .debug_loc section. + const DebugLocStream &getDebugLocs() const { return DebugLocs; } + + /// Emit an entry for the debug loc section. This can be used to + /// handle an entry that's going to be emitted into the debug loc section. + void emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU); + + /// Emit the location for a debug loc entry, including the size header. + void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry, + const DwarfCompileUnit *CU); + + void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP, + DIE &Die); + + AddressPool &getAddressPool() { return AddrPool; } + + void addAccelName(const DICompileUnit &CU, StringRef Name, const DIE &Die); + + void addAccelObjC(const DICompileUnit &CU, StringRef Name, const DIE &Die); + + void addAccelNamespace(const DICompileUnit &CU, StringRef Name, + const DIE &Die); + + void addAccelType(const DICompileUnit &CU, StringRef Name, const DIE &Die, + char Flags); + + const MachineFunction *getCurrentFunction() const { return CurFn; } + + /// A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// Find the matching DwarfCompileUnit for the given CU DIE. + DwarfCompileUnit *lookupCU(const DIE *Die) { return CUDieMap.lookup(Die); } + const DwarfCompileUnit *lookupCU(const DIE *Die) const { + return CUDieMap.lookup(Die); + } + + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} + + void addSectionLabel(const MCSymbol *Sym); + const MCSymbol *getSectionLabel(const MCSection *S); + + static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + const DbgValueLoc &Value, + DwarfExpression &DwarfExpr); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 000000000000..24bbf58b91ec --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,94 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H + +#include "EHStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCDwarf.h" + +namespace llvm { +class MachineFunction; +class ARMTargetStreamer; + +class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer { +protected: + DwarfCFIExceptionBase(AsmPrinter *A); + + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI; + /// Per-module flag to indicate if .cfi_section has beeen emitted. + bool hasEmittedCFISections; + + void markFunctionEnd() override; + void endFragment() override; +}; + +class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { + /// Per-function flag to indicate if .cfi_personality should be emitted. + bool shouldEmitPersonality; + + /// Per-function flag to indicate if .cfi_personality must be emitted. + bool forceEmitPersonality; + + /// Per-function flag to indicate if .cfi_lsda should be emitted. + bool shouldEmitLSDA; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfCFIException(AsmPrinter *A); + ~DwarfCFIException() override; + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + void beginFragment(const MachineBasicBlock *MBB, + ExceptionSymbolProvider ESP) override; +}; + +class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { + void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) override; + ARMTargetStreamer &getTargetStreamer(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + ~ARMException() override; + + /// Emit all exception information that should come after the content. + void endModule() override {} + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; +}; +} // End of namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp new file mode 100644 index 000000000000..1c5a244d7c5d --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -0,0 +1,578 @@ +//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfExpression.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/Register.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/Support/ErrorHandling.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + +using namespace llvm; + +void DwarfExpression::emitConstu(uint64_t Value) { + if (Value < 32) + emitOp(dwarf::DW_OP_lit0 + Value); + else if (Value == std::numeric_limits<uint64_t>::max()) { + // Only do this for 64-bit values as the DWARF expression stack uses + // target-address-size values. + emitOp(dwarf::DW_OP_lit0); + emitOp(dwarf::DW_OP_not); + } else { + emitOp(dwarf::DW_OP_constu); + emitUnsigned(Value); + } +} + +void DwarfExpression::addReg(int DwarfReg, const char *Comment) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert((isUnknownLocation() || isRegisterLocation()) && + "location description already locked down"); + LocationKind = Register; + if (DwarfReg < 32) { + emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + } else { + emitOp(dwarf::DW_OP_regx, Comment); + emitUnsigned(DwarfReg); + } +} + +void DwarfExpression::addBReg(int DwarfReg, int Offset) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + assert(!isRegisterLocation() && "location description already locked down"); + if (DwarfReg < 32) { + emitOp(dwarf::DW_OP_breg0 + DwarfReg); + } else { + emitOp(dwarf::DW_OP_bregx); + emitUnsigned(DwarfReg); + } + emitSigned(Offset); +} + +void DwarfExpression::addFBReg(int Offset) { + emitOp(dwarf::DW_OP_fbreg); + emitSigned(Offset); +} + +void DwarfExpression::addOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { + if (!SizeInBits) + return; + + const unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { + emitOp(dwarf::DW_OP_bit_piece); + emitUnsigned(SizeInBits); + emitUnsigned(OffsetInBits); + } else { + emitOp(dwarf::DW_OP_piece); + unsigned ByteSize = SizeInBits / SizeOfByte; + emitUnsigned(ByteSize); + } + this->OffsetInBits += SizeInBits; +} + +void DwarfExpression::addShr(unsigned ShiftBy) { + emitConstu(ShiftBy); + emitOp(dwarf::DW_OP_shr); +} + +void DwarfExpression::addAnd(unsigned Mask) { + emitConstu(Mask); + emitOp(dwarf::DW_OP_and); +} + +bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI, + unsigned MachineReg, unsigned MaxSize) { + if (!llvm::Register::isPhysicalRegister(MachineReg)) { + if (isFrameRegister(TRI, MachineReg)) { + DwarfRegs.push_back({-1, 0, nullptr}); + return true; + } + return false; + } + + int Reg = TRI.getDwarfRegNum(MachineReg, false); + + // If this is a valid register number, emit it. + if (Reg >= 0) { + DwarfRegs.push_back({Reg, 0, nullptr}); + return true; + } + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0. + for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + Reg = TRI.getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); + DwarfRegs.push_back({Reg, 0, "super-register"}); + // Use a DW_OP_bit_piece to describe the sub-register. + setSubRegisterPiece(Size, RegOffset); + return true; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + unsigned CurPos = 0; + // The size of the register in bits. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(MachineReg); + unsigned RegSize = TRI.getRegSizeInBits(*RC); + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. Because this is + // just doing a greedy scan of all subregisters, it is possible that + // this doesn't find a combination of subregisters that fully cover + // the register (even though one may exist). + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned Offset = TRI.getSubRegIdxOffset(Idx); + Reg = TRI.getDwarfRegNum(*SR, false); + if (Reg < 0) + continue; + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector CurSubReg(RegSize, false); + CurSubReg.set(Offset, Offset + Size); + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (CurSubReg.test(Coverage)) { + // Emit a piece for any gap in the coverage. + if (Offset > CurPos) + DwarfRegs.push_back({-1, Offset - CurPos, "no DWARF register encoding"}); + DwarfRegs.push_back( + {Reg, std::min<unsigned>(Size, MaxSize - Offset), "sub-register"}); + if (Offset >= MaxSize) + break; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + CurPos = Offset + Size; + } + } + // Failed to find any DWARF encoding. + if (CurPos == 0) + return false; + // Found a partial or complete DWARF encoding. + if (CurPos < RegSize) + DwarfRegs.push_back({-1, RegSize - CurPos, "no DWARF register encoding"}); + return true; +} + +void DwarfExpression::addStackValue() { + if (DwarfVersion >= 4) + emitOp(dwarf::DW_OP_stack_value); +} + +void DwarfExpression::addSignedConstant(int64_t Value) { + assert(isImplicitLocation() || isUnknownLocation()); + LocationKind = Implicit; + emitOp(dwarf::DW_OP_consts); + emitSigned(Value); +} + +void DwarfExpression::addUnsignedConstant(uint64_t Value) { + assert(isImplicitLocation() || isUnknownLocation()); + LocationKind = Implicit; + emitConstu(Value); +} + +void DwarfExpression::addUnsignedConstant(const APInt &Value) { + assert(isImplicitLocation() || isUnknownLocation()); + LocationKind = Implicit; + + unsigned Size = Value.getBitWidth(); + const uint64_t *Data = Value.getRawData(); + + // Chop it up into 64-bit pieces, because that's the maximum that + // addUnsignedConstant takes. + unsigned Offset = 0; + while (Offset < Size) { + addUnsignedConstant(*Data++); + if (Offset == 0 && Size <= 64) + break; + addStackValue(); + addOpPiece(std::min(Size - Offset, 64u), Offset); + Offset += 64; + } +} + +bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI, + DIExpressionCursor &ExprCursor, + unsigned MachineReg, + unsigned FragmentOffsetInBits) { + auto Fragment = ExprCursor.getFragmentInfo(); + if (!addMachineReg(TRI, MachineReg, Fragment ? Fragment->SizeInBits : ~1U)) { + LocationKind = Unknown; + return false; + } + + bool HasComplexExpression = false; + auto Op = ExprCursor.peek(); + if (Op && Op->getOp() != dwarf::DW_OP_LLVM_fragment) + HasComplexExpression = true; + + // If the register can only be described by a complex expression (i.e., + // multiple subregisters) it doesn't safely compose with another complex + // expression. For example, it is not possible to apply a DW_OP_deref + // operation to multiple DW_OP_pieces. + if (HasComplexExpression && DwarfRegs.size() > 1) { + DwarfRegs.clear(); + LocationKind = Unknown; + return false; + } + + // Handle simple register locations. If we are supposed to emit + // a call site parameter expression and if that expression is just a register + // location, emit it with addBReg and offset 0, because we should emit a DWARF + // expression representing a value, rather than a location. + if (!isMemoryLocation() && !HasComplexExpression && + (!isParameterValue() || isEntryValue())) { + for (auto &Reg : DwarfRegs) { + if (Reg.DwarfRegNo >= 0) + addReg(Reg.DwarfRegNo, Reg.Comment); + addOpPiece(Reg.Size); + } + + if (isEntryValue()) + finalizeEntryValue(); + + if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4) + emitOp(dwarf::DW_OP_stack_value); + + DwarfRegs.clear(); + return true; + } + + // Don't emit locations that cannot be expressed without DW_OP_stack_value. + if (DwarfVersion < 4) + if (any_of(ExprCursor, [](DIExpression::ExprOperand Op) -> bool { + return Op.getOp() == dwarf::DW_OP_stack_value; + })) { + DwarfRegs.clear(); + LocationKind = Unknown; + return false; + } + + assert(DwarfRegs.size() == 1); + auto Reg = DwarfRegs[0]; + bool FBReg = isFrameRegister(TRI, MachineReg); + int SignedOffset = 0; + assert(Reg.Size == 0 && "subregister has same size as superregister"); + + // Pattern-match combinations for which more efficient representations exist. + // [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset]. + if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) { + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max()); + if (Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.take(); + } + } + + // [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset] + // [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset] + // If Reg is a subregister we need to mask it out before subtracting. + if (Op && Op->getOp() == dwarf::DW_OP_constu) { + uint64_t Offset = Op->getArg(0); + uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max()); + auto N = ExprCursor.peekNext(); + if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) { + SignedOffset = Offset; + ExprCursor.consume(2); + } else if (N && N->getOp() == dwarf::DW_OP_minus && + !SubRegisterSizeInBits && Offset <= IntMax + 1) { + SignedOffset = -static_cast<int64_t>(Offset); + ExprCursor.consume(2); + } + } + + if (FBReg) + addFBReg(SignedOffset); + else + addBReg(Reg.DwarfRegNo, SignedOffset); + DwarfRegs.clear(); + return true; +} + +void DwarfExpression::beginEntryValueExpression( + DIExpressionCursor &ExprCursor) { + auto Op = ExprCursor.take(); + (void)Op; + assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value); + assert(!isMemoryLocation() && + "We don't support entry values of memory locations yet"); + assert(!IsEmittingEntryValue && "Already emitting entry value?"); + assert(Op->getArg(0) == 1 && + "Can currently only emit entry values covering a single operation"); + + emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value)); + IsEmittingEntryValue = true; + enableTemporaryBuffer(); +} + +void DwarfExpression::finalizeEntryValue() { + assert(IsEmittingEntryValue && "Entry value not open?"); + disableTemporaryBuffer(); + + // Emit the entry value's size operand. + unsigned Size = getTemporaryBufferSize(); + emitUnsigned(Size); + + // Emit the entry value's DWARF block operand. + commitTemporaryBuffer(); + + IsEmittingEntryValue = false; +} + +/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?". +static bool isMemoryLocation(DIExpressionCursor ExprCursor) { + while (ExprCursor) { + auto Op = ExprCursor.take(); + switch (Op->getOp()) { + case dwarf::DW_OP_deref: + case dwarf::DW_OP_LLVM_fragment: + break; + default: + return false; + } + } + return true; +} + +void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor, + unsigned FragmentOffsetInBits) { + // If we need to mask out a subregister, do it now, unless the next + // operation would emit an OpPiece anyway. + auto N = ExprCursor.peek(); + if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment)) + maskSubRegister(); + + Optional<DIExpression::ExprOperand> PrevConvertOp = None; + + while (ExprCursor) { + auto Op = ExprCursor.take(); + uint64_t OpNum = Op->getOp(); + + if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) { + emitOp(OpNum); + continue; + } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) { + addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0)); + continue; + } + + switch (OpNum) { + case dwarf::DW_OP_LLVM_fragment: { + unsigned SizeInBits = Op->getArg(1); + unsigned FragmentOffset = Op->getArg(0); + // The fragment offset must have already been adjusted by emitting an + // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base + // location. + assert(OffsetInBits >= FragmentOffset && "fragment offset not added?"); + + // If addMachineReg already emitted DW_OP_piece operations to represent + // a super-register by splicing together sub-registers, subtract the size + // of the pieces that was already emitted. + SizeInBits -= OffsetInBits - FragmentOffset; + + // If addMachineReg requested a DW_OP_bit_piece to stencil out a + // sub-register that is smaller than the current fragment's size, use it. + if (SubRegisterSizeInBits) + SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits); + + // Emit a DW_OP_stack_value for implicit location descriptions. + if (isImplicitLocation()) + addStackValue(); + + // Emit the DW_OP_piece. + addOpPiece(SizeInBits, SubRegisterOffsetInBits); + setSubRegisterPiece(0, 0); + // Reset the location description kind. + LocationKind = Unknown; + return; + } + case dwarf::DW_OP_plus_uconst: + assert(!isRegisterLocation()); + emitOp(dwarf::DW_OP_plus_uconst); + emitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: + case dwarf::DW_OP_mul: + case dwarf::DW_OP_div: + case dwarf::DW_OP_mod: + case dwarf::DW_OP_or: + case dwarf::DW_OP_and: + case dwarf::DW_OP_xor: + case dwarf::DW_OP_shl: + case dwarf::DW_OP_shr: + case dwarf::DW_OP_shra: + case dwarf::DW_OP_lit0: + case dwarf::DW_OP_not: + case dwarf::DW_OP_dup: + emitOp(OpNum); + break; + case dwarf::DW_OP_deref: + assert(!isRegisterLocation()); + // For more detailed explanation see llvm.org/PR43343. + assert(!isParameterValue() && "Parameter entry values should not be " + "dereferenced due to safety reasons."); + if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor)) + // Turning this into a memory location description makes the deref + // implicit. + LocationKind = Memory; + else + emitOp(dwarf::DW_OP_deref); + break; + case dwarf::DW_OP_constu: + assert(!isRegisterLocation()); + emitConstu(Op->getArg(0)); + break; + case dwarf::DW_OP_LLVM_convert: { + unsigned BitSize = Op->getArg(0); + dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1)); + if (DwarfVersion >= 5) { + emitOp(dwarf::DW_OP_convert); + // Reuse the base_type if we already have one in this CU otherwise we + // create a new one. + unsigned I = 0, E = CU.ExprRefedBaseTypes.size(); + for (; I != E; ++I) + if (CU.ExprRefedBaseTypes[I].BitSize == BitSize && + CU.ExprRefedBaseTypes[I].Encoding == Encoding) + break; + + if (I == E) + CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding); + + // If targeting a location-list; simply emit the index into the raw + // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been + // fitted with means to extract it later. + // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef + // (containing the index and a resolve mechanism during emit) into the + // DIE value list. + emitBaseTypeRef(I); + } else { + if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) { + if (Encoding == dwarf::DW_ATE_signed) + emitLegacySExt(PrevConvertOp->getArg(0)); + else if (Encoding == dwarf::DW_ATE_unsigned) + emitLegacyZExt(PrevConvertOp->getArg(0)); + PrevConvertOp = None; + } else { + PrevConvertOp = Op; + } + } + break; + } + case dwarf::DW_OP_stack_value: + LocationKind = Implicit; + break; + case dwarf::DW_OP_swap: + assert(!isRegisterLocation()); + emitOp(dwarf::DW_OP_swap); + break; + case dwarf::DW_OP_xderef: + assert(!isRegisterLocation()); + emitOp(dwarf::DW_OP_xderef); + break; + case dwarf::DW_OP_deref_size: + emitOp(dwarf::DW_OP_deref_size); + emitData1(Op->getArg(0)); + break; + case dwarf::DW_OP_LLVM_tag_offset: + TagOffset = Op->getArg(0); + break; + case dwarf::DW_OP_regx: + emitOp(dwarf::DW_OP_regx); + emitUnsigned(Op->getArg(0)); + break; + case dwarf::DW_OP_bregx: + emitOp(dwarf::DW_OP_bregx); + emitUnsigned(Op->getArg(0)); + emitSigned(Op->getArg(1)); + break; + default: + llvm_unreachable("unhandled opcode found in expression"); + } + } + + if (isImplicitLocation() && !isParameterValue()) + // Turn this into an implicit location description. + addStackValue(); +} + +/// add masking operations to stencil out a subregister. +void DwarfExpression::maskSubRegister() { + assert(SubRegisterSizeInBits && "no subregister was registered"); + if (SubRegisterOffsetInBits > 0) + addShr(SubRegisterOffsetInBits); + uint64_t Mask = (1ULL << (uint64_t)SubRegisterSizeInBits) - 1ULL; + addAnd(Mask); +} + +void DwarfExpression::finalize() { + assert(DwarfRegs.size() == 0 && "dwarf registers not emitted"); + // Emit any outstanding DW_OP_piece operations to mask out subregisters. + if (SubRegisterSizeInBits == 0) + return; + // Don't emit a DW_OP_piece for a subregister at offset 0. + if (SubRegisterOffsetInBits == 0) + return; + addOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits); +} + +void DwarfExpression::addFragmentOffset(const DIExpression *Expr) { + if (!Expr || !Expr->isFragment()) + return; + + uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits; + assert(FragmentOffset >= OffsetInBits && + "overlapping or duplicate fragments"); + if (FragmentOffset > OffsetInBits) + addOpPiece(FragmentOffset - OffsetInBits); + OffsetInBits = FragmentOffset; +} + +void DwarfExpression::emitLegacySExt(unsigned FromBits) { + // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X + emitOp(dwarf::DW_OP_dup); + emitOp(dwarf::DW_OP_constu); + emitUnsigned(FromBits - 1); + emitOp(dwarf::DW_OP_shr); + emitOp(dwarf::DW_OP_lit0); + emitOp(dwarf::DW_OP_not); + emitOp(dwarf::DW_OP_mul); + emitOp(dwarf::DW_OP_constu); + emitUnsigned(FromBits); + emitOp(dwarf::DW_OP_shl); + emitOp(dwarf::DW_OP_or); +} + +void DwarfExpression::emitLegacyZExt(unsigned FromBits) { + // (X & (1 << FromBits - 1)) + emitOp(dwarf::DW_OP_constu); + emitUnsigned((1ULL << FromBits) - 1); + emitOp(dwarf::DW_OP_and); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h new file mode 100644 index 000000000000..1ad46669f9b2 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -0,0 +1,413 @@ +//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H + +#include "ByteStreamer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include <cassert> +#include <cstdint> +#include <iterator> + +namespace llvm { + +class AsmPrinter; +class APInt; +class DwarfCompileUnit; +class DIELoc; +class TargetRegisterInfo; + +/// Holds a DIExpression and keeps track of how many operands have been consumed +/// so far. +class DIExpressionCursor { + DIExpression::expr_op_iterator Start, End; + +public: + DIExpressionCursor(const DIExpression *Expr) { + if (!Expr) { + assert(Start == End); + return; + } + Start = Expr->expr_op_begin(); + End = Expr->expr_op_end(); + } + + DIExpressionCursor(ArrayRef<uint64_t> Expr) + : Start(Expr.begin()), End(Expr.end()) {} + + DIExpressionCursor(const DIExpressionCursor &) = default; + + /// Consume one operation. + Optional<DIExpression::ExprOperand> take() { + if (Start == End) + return None; + return *(Start++); + } + + /// Consume N operations. + void consume(unsigned N) { std::advance(Start, N); } + + /// Return the current operation. + Optional<DIExpression::ExprOperand> peek() const { + if (Start == End) + return None; + return *(Start); + } + + /// Return the next operation. + Optional<DIExpression::ExprOperand> peekNext() const { + if (Start == End) + return None; + + auto Next = Start.getNext(); + if (Next == End) + return None; + + return *Next; + } + + /// Determine whether there are any operations left in this expression. + operator bool() const { return Start != End; } + + DIExpression::expr_op_iterator begin() const { return Start; } + DIExpression::expr_op_iterator end() const { return End; } + + /// Retrieve the fragment information, if any. + Optional<DIExpression::FragmentInfo> getFragmentInfo() const { + return DIExpression::getFragmentInfo(Start, End); + } +}; + +/// Base class containing the logic for constructing DWARF expressions +/// independently of whether they are emitted into a DIE or into a .debug_loc +/// entry. +/// +/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size +/// of a succeeding DWARF block before the latter is emitted to the output. +/// To handle such cases, data can conditionally be emitted to a temporary +/// buffer, which can later on be committed to the main output. The size of the +/// temporary buffer is queryable, allowing for the size of the data to be +/// emitted before the data is committed. +class DwarfExpression { +protected: + /// Holds information about all subregisters comprising a register location. + struct Register { + int DwarfRegNo; + unsigned Size; + const char *Comment; + }; + + /// Whether we are currently emitting an entry value operation. + bool IsEmittingEntryValue = false; + + DwarfCompileUnit &CU; + + /// The register location, if any. + SmallVector<Register, 2> DwarfRegs; + + /// Current Fragment Offset in Bits. + uint64_t OffsetInBits = 0; + + /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. + unsigned SubRegisterSizeInBits : 16; + unsigned SubRegisterOffsetInBits : 16; + + /// The kind of location description being produced. + enum { Unknown = 0, Register, Memory, Implicit }; + + /// The flags of location description being produced. + enum { EntryValue = 1, CallSiteParamValue }; + + unsigned LocationKind : 3; + unsigned LocationFlags : 2; + unsigned DwarfVersion : 4; + +public: + bool isUnknownLocation() const { + return LocationKind == Unknown; + } + + bool isMemoryLocation() const { + return LocationKind == Memory; + } + + bool isRegisterLocation() const { + return LocationKind == Register; + } + + bool isImplicitLocation() const { + return LocationKind == Implicit; + } + + bool isEntryValue() const { + return LocationFlags & EntryValue; + } + + bool isParameterValue() { + return LocationFlags & CallSiteParamValue; + } + + Optional<uint8_t> TagOffset; + +protected: + /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed + /// to represent a subregister. + void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) { + assert(SizeInBits < 65536 && OffsetInBits < 65536); + SubRegisterSizeInBits = SizeInBits; + SubRegisterOffsetInBits = OffsetInBits; + } + + /// Add masking operations to stencil out a subregister. + void maskSubRegister(); + + /// Output a dwarf operand and an optional assembler comment. + virtual void emitOp(uint8_t Op, const char *Comment = nullptr) = 0; + + /// Emit a raw signed value. + virtual void emitSigned(int64_t Value) = 0; + + /// Emit a raw unsigned value. + virtual void emitUnsigned(uint64_t Value) = 0; + + virtual void emitData1(uint8_t Value) = 0; + + virtual void emitBaseTypeRef(uint64_t Idx) = 0; + + /// Start emitting data to the temporary buffer. The data stored in the + /// temporary buffer can be committed to the main output using + /// commitTemporaryBuffer(). + virtual void enableTemporaryBuffer() = 0; + + /// Disable emission to the temporary buffer. This does not commit data + /// in the temporary buffer to the main output. + virtual void disableTemporaryBuffer() = 0; + + /// Return the emitted size, in number of bytes, for the data stored in the + /// temporary buffer. + virtual unsigned getTemporaryBufferSize() = 0; + + /// Commit the data stored in the temporary buffer to the main output. + virtual void commitTemporaryBuffer() = 0; + + /// Emit a normalized unsigned constant. + void emitConstu(uint64_t Value); + + /// Return whether the given machine register is the frame register in the + /// current function. + virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0; + + /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF + /// register location description. + void addReg(int DwarfReg, const char *Comment = nullptr); + + /// Emit a DW_OP_breg operation. + void addBReg(int DwarfReg, int Offset); + + /// Emit DW_OP_fbreg <Offset>. + void addFBReg(int Offset); + + /// Emit a partial DWARF register operation. + /// + /// \param MachineReg The register number. + /// \param MaxSize If the register must be composed from + /// sub-registers this is an upper bound + /// for how many bits the emitted DW_OP_piece + /// may cover. + /// + /// If size and offset is zero an operation for the entire register is + /// emitted: Some targets do not provide a DWARF register number for every + /// register. If this is the case, this function will attempt to emit a DWARF + /// register by emitting a fragment of a super-register or by piecing together + /// multiple subregisters that alias the register. + /// + /// \return false if no DWARF register exists for MachineReg. + bool addMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg, + unsigned MaxSize = ~1U); + + /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment. + /// \param OffsetInBits This is an optional offset into the location that + /// is at the top of the DWARF stack. + void addOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); + + /// Emit a shift-right dwarf operation. + void addShr(unsigned ShiftBy); + + /// Emit a bitwise and dwarf operation. + void addAnd(unsigned Mask); + + /// Emit a DW_OP_stack_value, if supported. + /// + /// The proper way to describe a constant value is DW_OP_constu <const>, + /// DW_OP_stack_value. Unfortunately, DW_OP_stack_value was not available + /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for + /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const + /// <const> actually describes a value at a constant address, not a constant + /// value. However, in the past there was no better way to describe a + /// constant value, so the producers and consumers started to rely on + /// heuristics to disambiguate the value vs. location status of the + /// expression. See PR21176 for more details. + void addStackValue(); + + /// Finalize an entry value by emitting its size operand, and committing the + /// DWARF block which has been emitted to the temporary buffer. + void finalizeEntryValue(); + + ~DwarfExpression() = default; + +public: + DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU) + : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0), + LocationKind(Unknown), LocationFlags(Unknown), + DwarfVersion(DwarfVersion) {} + + /// This needs to be called last to commit any pending changes. + void finalize(); + + /// Emit a signed constant. + void addSignedConstant(int64_t Value); + + /// Emit an unsigned constant. + void addUnsignedConstant(uint64_t Value); + + /// Emit an unsigned constant. + void addUnsignedConstant(const APInt &Value); + + /// Lock this down to become a memory location description. + void setMemoryLocationKind() { + assert(isUnknownLocation()); + LocationKind = Memory; + } + + /// Lock this down to become an entry value location. + void setEntryValueFlag() { + LocationFlags |= EntryValue; + } + + /// Lock this down to become a call site parameter location. + void setCallSiteParamValueFlag() { + LocationFlags |= CallSiteParamValue; + } + + /// Emit a machine register location. As an optimization this may also consume + /// the prefix of a DwarfExpression if a more efficient representation for + /// combining the register location and the first operation exists. + /// + /// \param FragmentOffsetInBits If this is one fragment out of a + /// fragmented + /// location, this is the offset of the + /// fragment inside the entire variable. + /// \return false if no DWARF register exists + /// for MachineReg. + bool addMachineRegExpression(const TargetRegisterInfo &TRI, + DIExpressionCursor &Expr, unsigned MachineReg, + unsigned FragmentOffsetInBits = 0); + + /// Begin emission of an entry value dwarf operation. The entry value's + /// first operand is the size of the DWARF block (its second operand), + /// which needs to be calculated at time of emission, so we don't emit + /// any operands here. + void beginEntryValueExpression(DIExpressionCursor &ExprCursor); + + /// Emit all remaining operations in the DIExpressionCursor. + /// + /// \param FragmentOffsetInBits If this is one fragment out of multiple + /// locations, this is the offset of the + /// fragment inside the entire variable. + void addExpression(DIExpressionCursor &&Expr, + unsigned FragmentOffsetInBits = 0); + + /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to + /// the fragment described by \c Expr. + void addFragmentOffset(const DIExpression *Expr); + + void emitLegacySExt(unsigned FromBits); + void emitLegacyZExt(unsigned FromBits); +}; + +/// DwarfExpression implementation for .debug_loc entries. +class DebugLocDwarfExpression final : public DwarfExpression { + + struct TempBuffer { + SmallString<32> Bytes; + std::vector<std::string> Comments; + BufferByteStreamer BS; + + TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {} + }; + + std::unique_ptr<TempBuffer> TmpBuf; + BufferByteStreamer &OutBS; + bool IsBuffering = false; + + /// Return the byte streamer that currently is being emitted to. + ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; } + + void emitOp(uint8_t Op, const char *Comment = nullptr) override; + void emitSigned(int64_t Value) override; + void emitUnsigned(uint64_t Value) override; + void emitData1(uint8_t Value) override; + void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + + bool isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) override; +public: + DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS, + DwarfCompileUnit &CU) + : DwarfExpression(DwarfVersion, CU), OutBS(BS) {} +}; + +/// DwarfExpression implementation for singular DW_AT_location. +class DIEDwarfExpression final : public DwarfExpression { + const AsmPrinter &AP; + DIELoc &OutDIE; + DIELoc TmpDIE; + bool IsBuffering = false; + + /// Return the DIE that currently is being emitted to. + DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; } + + void emitOp(uint8_t Op, const char *Comment = nullptr) override; + void emitSigned(int64_t Value) override; + void emitUnsigned(uint64_t Value) override; + void emitData1(uint8_t Value) override; + void emitBaseTypeRef(uint64_t Idx) override; + + void enableTemporaryBuffer() override; + void disableTemporaryBuffer() override; + unsigned getTemporaryBufferSize() override; + void commitTemporaryBuffer() override; + + bool isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) override; +public: + DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE); + + DIELoc *finalize() { + DwarfExpression::finalize(); + return &OutDIE; + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp new file mode 100644 index 000000000000..e3c9095d1343 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -0,0 +1,131 @@ +//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DwarfFile.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/MC/MCStreamer.h" +#include <algorithm> +#include <cstdint> + +using namespace llvm; + +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {} + +void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) { + CUs.push_back(std::move(U)); +} + +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfFile::emitUnits(bool UseOffsets) { + for (const auto &TheU : CUs) + emitUnit(TheU.get(), UseOffsets); +} + +void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) { + if (TheU->getCUNode()->isDebugDirectivesOnly()) + return; + + MCSection *S = TheU->getSection(); + + if (!S) + return; + + // Skip CUs that ended up not being needed (split CUs that were abandoned + // because they added no information beyond the non-split CU) + if (llvm::empty(TheU->getUnitDie().values())) + return; + + Asm->OutStreamer->SwitchSection(S); + TheU->emitHeader(UseOffsets); + Asm->emitDwarfDIE(TheU->getUnitDie()); + + if (MCSymbol *EndLabel = TheU->getEndLabel()) + Asm->OutStreamer->EmitLabel(EndLabel); +} + +// Compute the size and offset for each DIE. +void DwarfFile::computeSizeAndOffsets() { + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. + for (const auto &TheU : CUs) { + if (TheU->getCUNode()->isDebugDirectivesOnly()) + continue; + + // Skip CUs that ended up not being needed (split CUs that were abandoned + // because they added no information beyond the non-split CU) + if (llvm::empty(TheU->getUnitDie().values())) + return; + + TheU->setDebugSectionOffset(SecOffset); + SecOffset += computeSizeAndOffsetsForUnit(TheU.get()); + } +} + +unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) { + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers + + // The return value here is CU-relative, after laying out + // all of the CU DIE. + return computeSizeAndOffset(TheU->getUnitDie(), Offset); +} + +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. +unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { + return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset); +} + +void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); } + +// Emit strings into a string section. +void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection, + bool UseRelativeOffsets) { + StrPool.emit(*Asm, StrSection, OffsetSection, UseRelativeOffsets); +} + +bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + auto &ScopeVars = ScopeVariables[LS]; + const DILocalVariable *DV = Var->getVariable(); + if (unsigned ArgNum = DV->getArg()) { + auto Cached = ScopeVars.Args.find(ArgNum); + if (Cached == ScopeVars.Args.end()) + ScopeVars.Args[ArgNum] = Var; + else { + Cached->second->addMMIEntry(*Var); + return false; + } + } else { + ScopeVars.Locals.push_back(Var); + } + return true; +} + +void DwarfFile::addScopeLabel(LexicalScope *LS, DbgLabel *Label) { + SmallVectorImpl<DbgLabel *> &Labels = ScopeLabels[LS]; + Labels.push_back(Label); +} + +std::pair<uint32_t, RangeSpanList *> +DwarfFile::addRange(const DwarfCompileUnit &CU, SmallVector<RangeSpan, 2> R) { + CURangeLists.push_back( + RangeSpanList(Asm->createTempSymbol("debug_ranges"), CU, std::move(R))); + return std::make_pair(CURangeLists.size() - 1, &CURangeLists.back()); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h new file mode 100644 index 000000000000..35fa51fb24c4 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -0,0 +1,192 @@ +//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H + +#include "DwarfStringPool.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Allocator.h" +#include <map> +#include <memory> +#include <utility> + +namespace llvm { + +class AsmPrinter; +class DbgEntity; +class DbgVariable; +class DbgLabel; +class DwarfCompileUnit; +class DwarfUnit; +class LexicalScope; +class MCSection; + +// Data structure to hold a range for range lists. +struct RangeSpan { + const MCSymbol *Begin; + const MCSymbol *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + const DwarfCompileUnit *CU; + // List of ranges. + SmallVector<RangeSpan, 2> Ranges; + +public: + RangeSpanList(MCSymbol *Sym, const DwarfCompileUnit &CU, + SmallVector<RangeSpan, 2> Ranges) + : RangeSym(Sym), CU(&CU), Ranges(std::move(Ranges)) {} + MCSymbol *getSym() const { return RangeSym; } + const DwarfCompileUnit &getCU() const { return *CU; } + const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } +}; + +class DwarfFile { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + BumpPtrAllocator AbbrevAllocator; + + // Used to uniquely define abbreviations. + DIEAbbrevSet Abbrevs; + + // A pointer to all units in the section. + SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs; + + DwarfStringPool StrPool; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + + /// DWARF v5: The symbol that designates the start of the contribution to + /// the string offsets table. The contribution is shared by all units. + MCSymbol *StringOffsetsStartSym = nullptr; + + /// DWARF v5: The symbol that designates the base of the range list table. + /// The table is shared by all units. + MCSymbol *RnglistsTableBaseSym = nullptr; + + /// The variables of a lexical scope. + struct ScopeVars { + /// We need to sort Args by ArgNo and check for duplicates. This could also + /// be implemented as a list or vector + std::lower_bound(). + std::map<unsigned, DbgVariable *> Args; + SmallVector<DbgVariable *, 8> Locals; + }; + /// Collection of DbgVariables of each lexical scope. + DenseMap<LexicalScope *, ScopeVars> ScopeVariables; + + /// Collection of DbgLabels of each lexical scope. + using LabelList = SmallVector<DbgLabel *, 4>; + DenseMap<LexicalScope *, LabelList> ScopeLabels; + + // Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities; + + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in DwarfCompileUnit. + DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap; + +public: + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + + const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() { + return CUs; + } + + std::pair<uint32_t, RangeSpanList *> addRange(const DwarfCompileUnit &CU, + SmallVector<RangeSpan, 2> R); + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return CURangeLists; + } + + /// Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); + + /// Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// Compute the size and offset of all the DIEs in the given unit. + /// \returns The size of the root DIE. + unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU); + + /// Add a unit to the list of CUs. + void addUnit(std::unique_ptr<DwarfCompileUnit> U); + + /// Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(bool UseOffsets); + + /// Emit the given unit to its section. + void emitUnit(DwarfUnit *TheU, bool UseOffsets); + + /// Emit a set of abbreviations to the specific section. + void emitAbbrevs(MCSection *); + + /// Emit all of the strings to the section given. If OffsetSection is + /// non-null, emit a table of string offsets to it. If UseRelativeOffsets + /// is false, emit absolute offsets to the strings. Otherwise, emit + /// relocatable references to the strings if they are supported by the target. + void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr, + bool UseRelativeOffsets = false); + + /// Returns the string pool. + DwarfStringPool &getStringPool() { return StrPool; } + + MCSymbol *getStringOffsetsStartSym() const { return StringOffsetsStartSym; } + void setStringOffsetsStartSym(MCSymbol *Sym) { StringOffsetsStartSym = Sym; } + + MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; } + void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; } + + /// \returns false if the variable was merged with a previous one. + bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); + + void addScopeLabel(LexicalScope *LS, DbgLabel *Label); + + DenseMap<LexicalScope *, ScopeVars> &getScopeVariables() { + return ScopeVariables; + } + + DenseMap<LexicalScope *, LabelList> &getScopeLabels() { + return ScopeLabels; + } + + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { + return AbstractSPDies; + } + + DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() { + return AbstractEntities; + } + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + + DIE *getDIE(const MDNode *TypeMD) { + return DITypeNodeToDieMap.lookup(TypeMD); + } +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp new file mode 100644 index 000000000000..2a76dcb1b082 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -0,0 +1,130 @@ +//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DwarfStringPool.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include <cassert> +#include <utility> + +using namespace llvm; + +DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, + StringRef Prefix) + : Pool(A), Prefix(Prefix), + ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} + +StringMapEntry<DwarfStringPool::EntryTy> & +DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { + auto I = Pool.insert(std::make_pair(Str, EntryTy())); + auto &Entry = I.first->second; + if (I.second) { + Entry.Index = EntryTy::NotIndexed; + Entry.Offset = NumBytes; + Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; + + NumBytes += Str.size() + 1; + assert(NumBytes > Entry.Offset && "Unexpected overflow"); + } + return *I.first; +} + +DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, + StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + return EntryRef(MapEntry, false); +} + +DwarfStringPool::EntryRef DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, + StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + if (!MapEntry.getValue().isIndexed()) + MapEntry.getValue().Index = NumIndexedStrings++; + return EntryRef(MapEntry, true); +} + +void DwarfStringPool::emitStringOffsetsTableHeader(AsmPrinter &Asm, + MCSection *Section, + MCSymbol *StartSym) { + if (getNumIndexedStrings() == 0) + return; + Asm.OutStreamer->SwitchSection(Section); + unsigned EntrySize = 4; + // FIXME: DWARF64 + // We are emitting the header for a contribution to the string offsets + // table. The header consists of an entry with the contribution's + // size (not including the size of the length field), the DWARF version and + // 2 bytes of padding. + Asm.emitInt32(getNumIndexedStrings() * EntrySize + 4); + Asm.emitInt16(Asm.getDwarfVersion()); + Asm.emitInt16(0); + // Define the symbol that marks the start of the contribution. It is + // referenced by most unit headers via DW_AT_str_offsets_base. + // Split units do not use the attribute. + if (StartSym) + Asm.OutStreamer->EmitLabel(StartSym); +} + +void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection, bool UseRelativeOffsets) { + if (Pool.empty()) + return; + + // Start the dwarf str section. + Asm.OutStreamer->SwitchSection(StrSection); + + // Get all of the string pool entries and sort them by their offset. + SmallVector<const StringMapEntry<EntryTy> *, 64> Entries; + Entries.reserve(Pool.size()); + + for (const auto &E : Pool) + Entries.push_back(&E); + + llvm::sort(Entries, [](const StringMapEntry<EntryTy> *A, + const StringMapEntry<EntryTy> *B) { + return A->getValue().Offset < B->getValue().Offset; + }); + + for (const auto &Entry : Entries) { + assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) && + "Mismatch between setting and entry"); + + // Emit a label for reference from debug information entries. + if (ShouldCreateSymbols) + Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol); + + // Emit the string itself with a terminating null byte. + Asm.OutStreamer->AddComment("string offset=" + + Twine(Entry->getValue().Offset)); + Asm.OutStreamer->EmitBytes( + StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + // Now only take the indexed entries and put them in an array by their ID so + // we can emit them in order. + Entries.resize(NumIndexedStrings); + for (const auto &Entry : Pool) { + if (Entry.getValue().isIndexed()) + Entries[Entry.getValue().Index] = &Entry; + } + + Asm.OutStreamer->SwitchSection(OffsetSection); + unsigned size = 4; // FIXME: DWARF64 is 8. + for (const auto &Entry : Entries) + if (UseRelativeOffsets) + Asm.emitDwarfStringOffset(Entry->getValue()); + else + Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h new file mode 100644 index 000000000000..c5f5637fdae3 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -0,0 +1,66 @@ +//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/DwarfStringPoolEntry.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { + +class AsmPrinter; +class MCSection; +class MCSymbol; + +// Collection of strings for this unit and assorted symbols. +// A String->Symbol mapping of strings used by indirect +// references. +class DwarfStringPool { + using EntryTy = DwarfStringPoolEntry; + + StringMap<EntryTy, BumpPtrAllocator &> Pool; + StringRef Prefix; + unsigned NumBytes = 0; + unsigned NumIndexedStrings = 0; + bool ShouldCreateSymbols; + + StringMapEntry<EntryTy> &getEntryImpl(AsmPrinter &Asm, StringRef Str); + +public: + using EntryRef = DwarfStringPoolEntryRef; + + DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); + + void emitStringOffsetsTableHeader(AsmPrinter &Asm, MCSection *OffsetSection, + MCSymbol *StartSym); + + void emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection = nullptr, + bool UseRelativeOffsets = false); + + bool empty() const { return Pool.empty(); } + + unsigned size() const { return Pool.size(); } + + unsigned getNumIndexedStrings() const { return NumIndexedStrings; } + + /// Get a reference to an entry in the string pool. + EntryRef getEntry(AsmPrinter &Asm, StringRef Str); + + /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a + /// unique ID of this entry (e.g., for use in indexed forms like + /// DW_FORM_strx). + EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp new file mode 100644 index 000000000000..37c68c085792 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -0,0 +1,1723 @@ +//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for constructing a dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#include "DwarfUnit.h" +#include "AddressPool.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "DwarfExpression.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <cassert> +#include <cstdint> +#include <string> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, + DwarfCompileUnit &CU, DIELoc &DIE) + : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {} + +void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) { + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op); +} + +void DIEDwarfExpression::emitSigned(int64_t Value) { + CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value); +} + +void DIEDwarfExpression::emitUnsigned(uint64_t Value) { + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value); +} + +void DIEDwarfExpression::emitData1(uint8_t Value) { + CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value); +} + +void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) { + CU.addBaseTypeRef(getActiveDIE(), Idx); +} + +void DIEDwarfExpression::enableTemporaryBuffer() { + assert(!IsBuffering && "Already buffering?"); + IsBuffering = true; +} + +void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; } + +unsigned DIEDwarfExpression::getTemporaryBufferSize() { + return TmpDIE.ComputeSize(&AP); +} + +void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); } + +bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI, + unsigned MachineReg) { + return MachineReg == TRI.getFrameRegister(*AP.MF); +} + +DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node, + AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU) + : DIEUnit(A->getDwarfVersion(), A->MAI->getCodePointerSize(), UnitTag), + CUNode(Node), Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) { +} + +DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable) + : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU), + SplitLineTable(SplitLineTable) { +} + +DwarfUnit::~DwarfUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); + for (unsigned j = 0, M = DIELocs.size(); j < M; ++j) + DIELocs[j]->~DIELoc(); +} + +int64_t DwarfUnit::getDefaultLowerBound() const { + switch (getLanguage()) { + default: + break; + + // The languages below have valid values in all DWARF versions. + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C_plus_plus: + return 0; + + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + return 1; + + // The languages below have valid values only if the DWARF version >= 3. + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_ObjC: + case dwarf::DW_LANG_ObjC_plus_plus: + if (DD->getDwarfVersion() >= 3) + return 0; + break; + + case dwarf::DW_LANG_Fortran95: + if (DD->getDwarfVersion() >= 3) + return 1; + break; + + // Starting with DWARF v4, all defined languages have valid values. + case dwarf::DW_LANG_D: + case dwarf::DW_LANG_Java: + case dwarf::DW_LANG_Python: + case dwarf::DW_LANG_UPC: + if (DD->getDwarfVersion() >= 4) + return 0; + break; + + case dwarf::DW_LANG_Ada83: + case dwarf::DW_LANG_Ada95: + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LANG_Modula2: + case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LANG_PLI: + if (DD->getDwarfVersion() >= 4) + return 1; + break; + + // The languages below are new in DWARF v5. + case dwarf::DW_LANG_BLISS: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_C_plus_plus_14: + case dwarf::DW_LANG_Dylan: + case dwarf::DW_LANG_Go: + case dwarf::DW_LANG_Haskell: + case dwarf::DW_LANG_OCaml: + case dwarf::DW_LANG_OpenCL: + case dwarf::DW_LANG_RenderScript: + case dwarf::DW_LANG_Rust: + case dwarf::DW_LANG_Swift: + if (DD->getDwarfVersion() >= 5) + return 0; + break; + + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + case dwarf::DW_LANG_Julia: + case dwarf::DW_LANG_Modula3: + if (DD->getDwarfVersion() >= 5) + return 1; + break; + } + + return -1; +} + +/// Check whether the DIE for this MDNode can be shared across CUs. +bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const { + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. + // Combining type units and cross-CU DIE sharing is lower value (since + // cross-CU DIE sharing is used in LTO and removes type redundancy at that + // level already) but may be implementable for some value in projects + // building multiple independent libraries with LTO and then linking those + // together. + if (isDwoUnit() && !DD->shareAcrossDWOCUs()) + return false; + return (isa<DIType>(D) || + (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && + !DD->generateTypeUnits(); +} + +DIE *DwarfUnit::getDIE(const DINode *D) const { + if (isShareableAcrossCUs(D)) + return DU->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DU->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + +void DwarfUnit::insertDIE(DIE *D) { + MDNodeToDieMap.insert(std::make_pair(nullptr, D)); +} + +void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, + DIEInteger(1)); + else + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag, + DIEInteger(1)); +} + +void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + assert(Form != dwarf::DW_FORM_implicit_const && + "DW_FORM_implicit_const is used only for signed integers"); + Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); +} + +void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, + uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); +} + +void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); + Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); +} + +void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); +} + +void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { + if (CUNode->isDebugDirectivesOnly()) + return; + + if (DD->useInlineStrings()) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_string, + new (DIEValueAllocator) + DIEInlineString(String, DIEValueAllocator)); + return; + } + dwarf::Form IxForm = + isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp; + + auto StringPoolEntry = + useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index + ? DU->getStringPool().getIndexedEntry(*Asm, String) + : DU->getStringPool().getEntry(*Asm, String); + + // For DWARF v5 and beyond, use the smallest strx? form possible. + if (useSegmentedStringOffsetsTable()) { + IxForm = dwarf::DW_FORM_strx1; + unsigned Index = StringPoolEntry.getIndex(); + if (Index > 0xffffff) + IxForm = dwarf::DW_FORM_strx4; + else if (Index > 0xffff) + IxForm = dwarf::DW_FORM_strx3; + else if (Index > 0xff) + IxForm = dwarf::DW_FORM_strx2; + } + Die.addValue(DIEValueAllocator, Attribute, IxForm, + DIEString(StringPoolEntry)); +} + +DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label) { + return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label)); +} + +void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + +Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const { + assert(File); + if (DD->getDwarfVersion() < 5) + return None; + Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum(); + if (!Checksum || Checksum->Kind != DIFile::CSK_MD5) + return None; + + // Convert the string checksum to an MD5Result for the streamer. + // The verifier validates the checksum so we assume it's okay. + // An MD5 checksum is 16 bytes. + std::string ChecksumString = fromHex(Checksum->Value); + MD5::MD5Result CKMem; + std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data()); + return CKMem; +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) { + if (!SplitLineTable) + return getCU().getOrCreateSourceID(File); + if (!UsedLineTable) { + UsedLineTable = true; + // This is a split type unit that needs a line table. + addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0); + } + return SplitLineTable->getFile(File->getDirectory(), File->getFilename(), + getMD5AsBytes(File), + Asm->OutContext.getDwarfVersion(), + File->getSource()); +} + +void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { + if (DD->getDwarfVersion() >= 5) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addrx); + addUInt(Die, dwarf::DW_FORM_addrx, DD->getAddressPool().getIndex(Sym)); + return; + } + + if (DD->useSplitDwarf()) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, + DD->getAddressPool().getIndex(Sym)); + return; + } + + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); +} + +void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { + addDIEEntry(Die, Attribute, DIEEntry(Entry)); +} + +void DwarfUnit::addDIETypeSignature(DIE &Die, uint64_t Signature) { + // Flag the type unit reference as a declaration so that if it contains + // members (implicit special members, static data member definitions, member + // declarations for definitions in this CU, etc) consumers don't get confused + // and think this is a full definition. + addFlag(Die, dwarf::DW_AT_declaration); + + Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature, + dwarf::DW_FORM_ref_sig8, DIEInteger(Signature)); +} + +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, + DIEEntry Entry) { + const DIEUnit *CU = Die.getUnit(); + const DIEUnit *EntryCU = Entry.getEntry().getUnit(); + if (!CU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + CU = getUnitDie().getUnit(); + if (!EntryCU) + EntryCU = getUnitDie().getUnit(); + Die.addValue(DIEValueAllocator, Attribute, + EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + Entry); +} + +DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { + DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag)); + if (N) + insertDIE(N, &Die); + return Die; +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { + Loc->ComputeSize(Asm); + DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. + Die.addValue(DIEValueAllocator, Attribute, + Loc->BestForm(DD->getDwarfVersion()), Loc); +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block); +} + +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) { + if (Line == 0) + return; + + unsigned FileID = getOrCreateSourceID(File); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { + assert(V); + + addSourceLine(Die, V->getLine(), V->getFile()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) { + assert(G); + + addSourceLine(Die, G->getLine(), G->getFile()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { + assert(SP); + + addSourceLine(Die, SP->getLine(), SP->getFile()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DILabel *L) { + assert(L); + + addSourceLine(Die, L->getLine(), L->getFile()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { + assert(Ty); + + addSourceLine(Die, Ty->getLine(), Ty->getFile()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { + assert(Ty); + + addSourceLine(Die, Ty->getLine(), Ty->getFile()); +} + +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) + return false; + + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; + } + + if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) + return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type); + assert(DTy->getBaseType() && "Expected valid base type"); + return isUnsignedDIType(DD, DTy->getBaseType()); + } + + auto *BTy = cast<DIBasicType>(Ty); + unsigned Encoding = BTy->getEncoding(); + assert((Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_signed || + Encoding == dwarf::DW_ATE_signed_char || + Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || + Encoding == dwarf::DW_ATE_boolean || + (Ty->getTag() == dwarf::DW_TAG_unspecified_type && + Ty->getName() == "decltype(nullptr)")) && + "Unsupported encoding"); + return Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty->getTag() == dwarf::DW_TAG_unspecified_type; +} + +void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char *)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); +} + +void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, + const DIType *Ty) { + addConstantValue(Die, CI->getValue(), Ty); +} + +void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, + const DIType *Ty) { + assert(MO.isImm() && "Invalid machine operand!"); + + addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); +} + +void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) { + addConstantValue(Die, isUnsignedDIType(DD, Ty), Val); +} + +void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { + // FIXME: This is a bit conservative/simple - it emits negative values always + // sign extended to 64 bits rather than minimizing the number of bytes. + addUInt(Die, dwarf::DW_AT_const_value, + Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) { + addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); + if (CIBitWidth <= 64) { + addConstantValue(Die, Unsigned, + Unsigned ? Val.getZExtValue() : Val.getSExtValue()); + return; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; + + // Get the raw data form of the large APInt. + const uint64_t *Ptr64 = Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + + // Output the constant to DWARF one byte at a time. + for (int i = 0; i < NumBytes; i++) { + uint8_t c; + if (LittleEndian) + c = Ptr64[i / 8] >> (8 * (i & 7)); + else + c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); + addUInt(*Block, dwarf::DW_FORM_data1, c); + } + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { + if (!LinkageName.empty()) + addString(Die, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::dropLLVMManglingEscape(LinkageName)); +} + +void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { + // Add template parameters. + for (const auto *Element : TParams) { + if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element)) + constructTemplateTypeParameterDIE(Buffer, TTP); + else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element)) + constructTemplateValueParameterDIE(Buffer, TVP); + } +} + +/// Add thrown types. +void DwarfUnit::addThrownTypes(DIE &Die, DINodeArray ThrownTypes) { + for (const auto *Ty : ThrownTypes) { + DIE &TT = createAndAddDIE(dwarf::DW_TAG_thrown_type, Die); + addType(TT, cast<DIType>(Ty)); + } +} + +DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { + if (!Context || isa<DIFile>(Context)) + return &getUnitDie(); + if (auto *T = dyn_cast<DIType>(Context)) + return getOrCreateTypeDIE(T); + if (auto *NS = dyn_cast<DINamespace>(Context)) + return getOrCreateNameSpace(NS); + if (auto *SP = dyn_cast<DISubprogram>(Context)) + return getOrCreateSubprogramDIE(SP); + if (auto *M = dyn_cast<DIModule>(Context)) + return getOrCreateModule(M); + return getDIE(Context); +} + +DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { + auto *Context = Ty->getScope(); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + if (DIE *TyDIE = getDIE(Ty)) + return TyDIE; + + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); + + constructTypeDIE(TyDIE, cast<DICompositeType>(Ty)); + + updateAcceleratorTables(Context, Ty, TyDIE); + return &TyDIE; +} + +DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE, + const DIType *Ty) { + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + + if (auto *BT = dyn_cast<DIBasicType>(Ty)) + constructTypeDIE(TyDIE, BT); + else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) + constructTypeDIE(TyDIE, STy); + else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (DD->generateTypeUnits() && !Ty->isForwardDecl() && + (Ty->getRawName() || CTy->getRawIdentifier())) { + // Skip updating the accelerator tables since this is not the full type. + if (MDString *TypeId = CTy->getRawIdentifier()) + DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); + else { + auto X = DD->enterNonTypeUnitContext(); + finishNonUnitTypeDIE(TyDIE, CTy); + } + return &TyDIE; + } + constructTypeDIE(TyDIE, CTy); + } else { + constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); + } + + return &TyDIE; +} + +DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { + if (!TyNode) + return nullptr; + + auto *Ty = cast<DIType>(TyNode); + + // DW_TAG_restrict_type is not supported in DWARF2 + if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType()); + + // DW_TAG_atomic_type is not supported in DWARF < 5 + if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5) + return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + auto *Context = Ty->getScope(); + DIE *ContextDIE = getOrCreateContextDIE(Context); + assert(ContextDIE); + + if (DIE *TyDIE = getDIE(Ty)) + return TyDIE; + + return static_cast<DwarfUnit *>(ContextDIE->getUnit()) + ->createTypeDIE(Context, *ContextDIE, Ty); +} + +void DwarfUnit::updateAcceleratorTables(const DIScope *Context, + const DIType *Ty, const DIE &TyDIE) { + if (!Ty->getName().empty() && !Ty->isForwardDecl()) { + bool IsImplementation = false; + if (auto *CT = dyn_cast<DICompositeType>(Ty)) { + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; + DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags); + + if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context) || isa<DICommonBlock>(Context)) + addGlobalType(Ty, TyDIE, Context); + } +} + +void DwarfUnit::addType(DIE &Entity, const DIType *Ty, + dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); + addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); +} + +std::string DwarfUnit::getParentContextString(const DIScope *Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) + return ""; + + std::string CS; + SmallVector<const DIScope *, 1> Parents; + while (!isa<DICompileUnit>(Context)) { + Parents.push_back(Context); + if (const DIScope *S = Context->getScope()) + Context = S; + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { + StringRef Name = Ctx->getName(); + if (Name.empty() && isa<DINamespace>(Ctx)) + Name = "(anonymous namespace)"; + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { + // Get core information. + StringRef Name = BTy->getName(); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + // An unspecified type only has a name attribute. + if (BTy->getTag() == dwarf::DW_TAG_unspecified_type) + return; + + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy->getEncoding()); + + uint64_t Size = BTy->getSizeInBits() >> 3; + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + + if (BTy->isBigEndian()) + addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_big); + else if (BTy->isLittleEndian()) + addUInt(Buffer, dwarf::DW_AT_endianity, None, dwarf::DW_END_little); +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { + // Get core information. + StringRef Name = DTy->getName(); + uint64_t Size = DTy->getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + // Map to main type, void will not have a type. + const DIType *FromTy = DTy->getBaseType(); + if (FromTy) + addType(Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size && Tag != dwarf::DW_TAG_pointer_type + && Tag != dwarf::DW_TAG_ptr_to_member_type + && Tag != dwarf::DW_TAG_reference_type + && Tag != dwarf::DW_TAG_rvalue_reference_type) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + + if (Tag == dwarf::DW_TAG_ptr_to_member_type) + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType())); + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy->isForwardDecl()) + addSourceLine(Buffer, DTy); + + // If DWARF address space value is other than None, add it. The IR + // verifier checks that DWARF address space only exists for pointer + // or reference types. + if (DTy->getDWARFAddressSpace()) + addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4, + DTy->getDWARFAddressSpace().getValue()); +} + +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { + for (unsigned i = 1, N = Args.size(); i < N; ++i) { + const DIType *Ty = Args[i]; + if (!Ty) { + assert(i == N-1 && "Unspecified parameter must be the last argument"); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); + } else { + DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + addType(Arg, Ty); + if (Ty->isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } + } +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { + // Add return type. A void return won't have a type. + auto Elements = cast<DISubroutineType>(CTy)->getTypeArray(); + if (Elements.size()) + if (auto RTy = Elements[0]) + addType(Buffer, RTy); + + bool isPrototyped = true; + if (Elements.size() == 2 && !Elements[1]) + isPrototyped = false; + + constructSubprogramArguments(Buffer, Elements); + + // Add prototype flag if we're dealing with a C language and the function has + // been prototyped. + uint16_t Language = getLanguage(); + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(Buffer, dwarf::DW_AT_prototyped); + + // Add a DW_AT_calling_convention if this has an explicit convention. + if (CTy->getCC() && CTy->getCC() != dwarf::DW_CC_normal) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CTy->getCC()); + + if (CTy->isLValueReference()) + addFlag(Buffer, dwarf::DW_AT_reference); + + if (CTy->isRValueReference()) + addFlag(Buffer, dwarf::DW_AT_rvalue_reference); +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + // Add name if not anonymous or intermediate type. + StringRef Name = CTy->getName(); + + uint64_t Size = CTy->getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_variant_part: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Emit the discriminator for a variant part. + DIDerivedType *Discriminator = nullptr; + if (Tag == dwarf::DW_TAG_variant_part) { + Discriminator = CTy->getDiscriminator(); + if (Discriminator) { + // DWARF says: + // If the variant part has a discriminant, the discriminant is + // represented by a separate debugging information entry which is + // a child of the variant part entry. + DIE &DiscMember = constructMemberDIE(Buffer, Discriminator); + addDIEEntry(Buffer, dwarf::DW_AT_discr, DiscMember); + } + } + + // Add elements to structure type. + DINodeArray Elements = CTy->getElements(); + for (const auto *Element : Elements) { + if (!Element) + continue; + if (auto *SP = dyn_cast<DISubprogram>(Element)) + getOrCreateSubprogramDIE(SP); + else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_friend) { + DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend); + } else if (DDTy->isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else if (Tag == dwarf::DW_TAG_variant_part) { + // When emitting a variant part, wrap each member in + // DW_TAG_variant. + DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer); + if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) { + if (isUnsignedDIType(DD, Discriminator->getBaseType())) + addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue()); + else + addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue()); + } + constructMemberDIE(Variant, DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } + } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) { + DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer); + StringRef PropertyName = Property->getName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + if (Property->getType()) + addType(ElemDie, Property->getType()); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property->getGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property->getSetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + if (unsigned PropertyAttributes = Property->getAttributes()) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); + } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) { + if (Composite->getTag() == dwarf::DW_TAG_variant_part) { + DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer); + constructTypeDIE(VariantPart, Composite); + } + } + } + + if (CTy->isAppleBlockExtension()) + addFlag(Buffer, dwarf::DW_AT_APPLE_block); + + if (CTy->getExportSymbols()) + addFlag(Buffer, dwarf::DW_AT_export_symbols); + + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type + // inside C++ composite types to point to the base class with the vtable. + // Rust uses DW_AT_containing_type to link a vtable to the type + // for which it was created. + if (auto *ContainingType = CTy->getVTableHolder()) + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(ContainingType)); + + if (CTy->isObjcClassComplete()) + addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy->getTemplateParams()); + + // Add the type's non-standard calling convention. + uint8_t CC = 0; + if (CTy->isTypePassByValue()) + CC = dwarf::DW_CC_pass_by_value; + else if (CTy->isTypePassByReference()) + CC = dwarf::DW_CC_pass_by_reference; + if (CC) + addUInt(Buffer, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, + CC); + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? + if (Size) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + else if (!CTy->isForwardDecl()) + // Add zero size if it is not a forward declaration. + addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); + + // If we're a forward decl, say so. + if (CTy->isForwardDecl()) + addFlag(Buffer, dwarf::DW_AT_declaration); + + // Add source line info if available. + if (!CTy->isForwardDecl()) + addSourceLine(Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy->getRuntimeLang(); + if (RLang) + addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); + + // Add align info if available. + if (uint32_t AlignInBytes = CTy->getAlignInBytes()) + addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + } +} + +void DwarfUnit::constructTemplateTypeParameterDIE( + DIE &Buffer, const DITemplateTypeParameter *TP) { + DIE &ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP->getType()) + addType(ParamDIE, TP->getType()); + if (!TP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); +} + +void DwarfUnit::constructTemplateValueParameterDIE( + DIE &Buffer, const DITemplateValueParameter *VP) { + DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP->getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, VP->getType()); + if (!VP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (Metadata *Val = VP->getValue()) { + if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) + addConstantValue(ParamDIE, CI, VP->getType()); + else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { + // We cannot describe the location of dllimport'd entities: the + // computation of their address requires loads from the IAT. + if (!GV->hasDLLImportStorageClass()) { + // For declaration non-type template parameters (such as global values + // and functions) + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addOpAddress(*Loc, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of + // the parameter, rather than a pointer to it. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Loc); + } + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa<MDString>(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast<MDString>(Val)->getString()); + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + addTemplateParams(ParamDIE, cast<MDTuple>(Val)); + } + } +} + +DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS->getScope()); + + if (DIE *NDie = getDIE(NS)) + return NDie; + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + + StringRef Name = NS->getName(); + if (!Name.empty()) + addString(NDie, dwarf::DW_AT_name, NS->getName()); + else + Name = "(anonymous namespace)"; + DD->addAccelNamespace(*CUNode, Name, NDie); + addGlobalName(Name, NDie, NS->getScope()); + if (NS->getExportSymbols()) + addFlag(NDie, dwarf::DW_AT_export_symbols); + return &NDie; +} + +DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(M->getScope()); + + if (DIE *MDie = getDIE(M)) + return MDie; + DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M); + + if (!M->getName().empty()) { + addString(MDie, dwarf::DW_AT_name, M->getName()); + addGlobalName(M->getName(), MDie, M->getScope()); + } + if (!M->getConfigurationMacros().empty()) + addString(MDie, dwarf::DW_AT_LLVM_config_macros, + M->getConfigurationMacros()); + if (!M->getIncludePath().empty()) + addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath()); + if (!M->getISysRoot().empty()) + addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot()); + + return &MDie; +} + +DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = + Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope()); + + if (DIE *SPDie = getDIE(SP)) + return SPDie; + + if (auto *SPDecl = SP->getDeclaration()) { + if (!Minimal) { + // Add subprogram definitions to the CU die directly. + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it precedes the definition. + getOrCreateSubprogramDIE(SPDecl); + } + } + + // DW_TAG_inlined_subroutine may refer to this DIE. + DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + + // Stop here and fill this in later, depending on whether or not this + // subprogram turns out to have inlined instances or not. + if (SP->isDefinition()) + return &SPDie; + + static_cast<DwarfUnit *>(SPDie.getUnit()) + ->applySubprogramAttributes(SP, SPDie); + return &SPDie; +} + +bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, + DIE &SPDie) { + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (auto *SPDecl = SP->getDeclaration()) { + DeclDie = getDIE(SPDecl); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); + // Look at the Decl's linkage name only if we emitted it. + if (DD->useAllLinkageNames()) + DeclLinkageName = SPDecl->getLinkageName(); + unsigned DeclID = getOrCreateSourceID(SPDecl->getFile()); + unsigned DefID = getOrCreateSourceID(SP->getFile()); + if (DeclID != DefID) + addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + + if (SP->getLine() != SPDecl->getLine()) + addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + } + + // Add function template parameters. + addTemplateParams(SPDie, SP->getTemplateParams()); + + // Add the linkage name if we have one and it isn't in the Decl. + StringRef LinkageName = SP->getLinkageName(); + assert(((LinkageName.empty() || DeclLinkageName.empty()) || + LinkageName == DeclLinkageName) && + "decl has a linkage name and it is different"); + if (DeclLinkageName.empty() && + // Always emit it for abstract subprograms. + (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) + addLinkageName(SPDie, LinkageName); + + if (!DeclDie) + return false; + + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return true; +} + +void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, + bool SkipSPAttributes) { + // If -fdebug-info-for-profiling is enabled, need to emit the subprogram + // and its source location. + bool SkipSPSourceLocation = SkipSPAttributes && + !CUNode->getDebugInfoForProfiling(); + if (!SkipSPSourceLocation) + if (applySubprogramDefinitionAttributes(SP, SPDie)) + return; + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP->getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP->getName()); + + if (!SkipSPSourceLocation) + addSourceLine(SPDie, SP); + + // Skip the rest of the attributes under -gmlt to save space. + if (SkipSPAttributes) + return; + + // Add the prototype if we have a prototype and we have a C like + // language. + uint16_t Language = getLanguage(); + if (SP->isPrototyped() && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(SPDie, dwarf::DW_AT_prototyped); + + unsigned CC = 0; + DITypeRefArray Args; + if (const DISubroutineType *SPTy = SP->getType()) { + Args = SPTy->getTypeArray(); + CC = SPTy->getCC(); + } + + // Add a DW_AT_calling_convention if this has an explicit convention. + if (CC && CC != dwarf::DW_CC_normal) + addUInt(SPDie, dwarf::DW_AT_calling_convention, dwarf::DW_FORM_data1, CC); + + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.size()) + if (auto Ty = Args[0]) + addType(SPDie, Ty); + + unsigned VK = SP->getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); + if (SP->getVirtualIndex() != -1u) { + DIELoc *Block = getDIELoc(); + addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + } + ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType())); + } + + if (!SP->isDefinition()) { + addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + constructSubprogramArguments(SPDie, Args); + } + + addThrownTypes(SPDie, SP->getThrownTypes()); + + if (SP->isArtificial()) + addFlag(SPDie, dwarf::DW_AT_artificial); + + if (!SP->isLocalToUnit()) + addFlag(SPDie, dwarf::DW_AT_external); + + if (DD->useAppleExtensionAttributes()) { + if (SP->isOptimized()) + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); + + if (unsigned isa = Asm->getISAEncoding()) + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + } + + if (SP->isLValueReference()) + addFlag(SPDie, dwarf::DW_AT_reference); + + if (SP->isRValueReference()) + addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + + if (SP->isNoReturn()) + addFlag(SPDie, dwarf::DW_AT_noreturn); + + if (SP->isProtected()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP->isPrivate()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else if (SP->isPublic()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (SP->isExplicit()) + addFlag(SPDie, dwarf::DW_AT_explicit); + + if (SP->isMainSubprogram()) + addFlag(SPDie, dwarf::DW_AT_main_subprogram); + if (SP->isPure()) + addFlag(SPDie, dwarf::DW_AT_pure); + if (SP->isElemental()) + addFlag(SPDie, dwarf::DW_AT_elemental); + if (SP->isRecursive()) + addFlag(SPDie, dwarf::DW_AT_recursive); +} + +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, + DIE *IndexTy) { + DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy); + + // The LowerBound value defines the lower bounds which is typically zero for + // C/C++. The Count value is the number of elements. Values are 64 bit. If + // Count == -1 then the array is unbounded and we do not emit + // DW_AT_lower_bound and DW_AT_count attributes. + int64_t LowerBound = SR->getLowerBound(); + int64_t DefaultLowerBound = getDefaultLowerBound(); + int64_t Count = -1; + if (auto *CI = SR->getCount().dyn_cast<ConstantInt*>()) + Count = CI->getSExtValue(); + + if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); + + if (auto *CV = SR->getCount().dyn_cast<DIVariable*>()) { + if (auto *CountVarDIE = getDIE(CV)) + addDIEEntry(DW_Subrange, dwarf::DW_AT_count, *CountVarDIE); + } else if (Count != -1) + addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); +} + +DIE *DwarfUnit::getIndexTyDie() { + if (IndexTyDie) + return IndexTyDie; + // Construct an integer type to use for indexes. + IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie()); + StringRef Name = "__ARRAY_SIZE_TYPE__"; + addString(*IndexTyDie, dwarf::DW_AT_name, Name); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_unsigned); + DD->addAccelType(*CUNode, Name, *IndexTyDie, /*Flags*/ 0); + return IndexTyDie; +} + +/// Returns true if the vector's size differs from the sum of sizes of elements +/// the user specified. This can occur if the vector has been rounded up to +/// fit memory alignment constraints. +static bool hasVectorBeenPadded(const DICompositeType *CTy) { + assert(CTy && CTy->isVector() && "Composite type is not a vector"); + const uint64_t ActualSize = CTy->getSizeInBits(); + + // Obtain the size of each element in the vector. + DIType *BaseTy = CTy->getBaseType(); + assert(BaseTy && "Unknown vector element type."); + const uint64_t ElementSize = BaseTy->getSizeInBits(); + + // Locate the number of elements in the vector. + const DINodeArray Elements = CTy->getElements(); + assert(Elements.size() == 1 && + Elements[0]->getTag() == dwarf::DW_TAG_subrange_type && + "Invalid vector element array, expected one element of type subrange"); + const auto Subrange = cast<DISubrange>(Elements[0]); + const auto CI = Subrange->getCount().get<ConstantInt *>(); + const int32_t NumVecElements = CI->getSExtValue(); + + // Ensure we found the element count and that the actual size is wide + // enough to contain the requested size. + assert(ActualSize >= (NumVecElements * ElementSize) && "Invalid vector size"); + return ActualSize != (NumVecElements * ElementSize); +} + +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isVector()) { + addFlag(Buffer, dwarf::DW_AT_GNU_vector); + if (hasVectorBeenPadded(CTy)) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, + CTy->getSizeInBits() / CHAR_BIT); + } + + // Emit the element type. + addType(Buffer, CTy->getBaseType()); + + // Get an anonymous type for index type. + // FIXME: This type should be passed down from the front end + // as different languages may have different sizes for indexes. + DIE *IdxTy = getIndexTyDie(); + + // Add subranges to array type. + DINodeArray Elements = CTy->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + // FIXME: Should this really be such a loose cast? + if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy); + } +} + +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + const DIType *DTy = CTy->getBaseType(); + bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy); + if (DTy) { + if (DD->getDwarfVersion() >= 3) + addType(Buffer, DTy); + if (DD->getDwarfVersion() >= 4 && (CTy->getFlags() & DINode::FlagEnumClass)) + addFlag(Buffer, dwarf::DW_AT_enum_class); + } + + auto *Context = CTy->getScope(); + bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context) || isa<DICommonBlock>(Context); + DINodeArray Elements = CTy->getElements(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]); + if (Enum) { + DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum->getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + auto Value = static_cast<uint64_t>(Enum->getValue()); + addConstantValue(Enumerator, IsUnsigned, Value); + if (IndexEnumerators) + addGlobalName(Name, Enumerator, Context); + } + } +} + +void DwarfUnit::constructContainingTypeDIEs() { + for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); + CI != CE; ++CI) { + DIE &SPDie = *CI->first; + const DINode *D = CI->second; + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie); + } +} + +DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { + DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer); + StringRef Name = DT->getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, Name); + + if (DIType *Resolved = DT->getBaseType()) + addType(MemberDie, Resolved); + + addSourceLine(MemberDie, DT); + + if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc; + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT->getSizeInBits(); + uint64_t FieldSize = DD->getBaseTypeSize(DT); + uint32_t AlignInBytes = DT->getAlignInBytes(); + uint64_t OffsetInBytes; + + bool IsBitfield = FieldSize && Size != FieldSize; + if (IsBitfield) { + // Handle bitfield, assume bytes are 8 bits. + if (DD->useDWARF2Bitfields()) + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); + + uint64_t Offset = DT->getOffsetInBits(); + // We can't use DT->getAlignInBits() here: AlignInBits for member type + // is non-zero if and only if alignment was forced (e.g. _Alignas()), + // which can't be done with bitfields. Thus we use FieldSize here. + uint32_t AlignInBits = FieldSize; + uint32_t AlignMask = ~(AlignInBits - 1); + // The bits from the start of the storage unit to the start of the field. + uint64_t StartBitOffset = Offset - (Offset & AlignMask); + // The byte offset of the field's aligned storage unit inside the struct. + OffsetInBytes = (Offset - StartBitOffset) / 8; + + if (DD->useDWARF2Bitfields()) { + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + OffsetInBytes = FieldOffset >> 3; + } else { + addUInt(MemberDie, dwarf::DW_AT_data_bit_offset, None, Offset); + } + } else { + // This is not a bitfield. + OffsetInBytes = DT->getOffsetInBits() / 8; + if (AlignInBytes) + addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + } + + if (DD->getDwarfVersion() <= 2) { + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; + addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else if (!IsBitfield || DD->useDWARF2Bitfields()) + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } + + if (DT->isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT->isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT->isPublic()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (DT->isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + if (DINode *PNode = DT->getObjCProperty()) + if (DIE *PDie = getDIE(PNode)) + MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property, + dwarf::DW_FORM_ref4, DIEEntry(*PDie)); + + if (DT->isArtificial()) + addFlag(MemberDie, dwarf::DW_AT_artificial); + + return MemberDie; +} + +DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { + if (!DT) + return nullptr; + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(DT->getScope()); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + if (DIE *StaticMemberDIE = getDIE(DT)) + return StaticMemberDIE; + + DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT); + + const DIType *Ty = DT->getBaseType(); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT->isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT->isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else if (DT->isPublic()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty); + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + if (uint32_t AlignInBytes = DT->getAlignInBytes()) + addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata, + AlignInBytes); + + return &StaticMemberDIE; +} + +void DwarfUnit::emitCommonHeader(bool UseOffsets, dwarf::UnitType UT) { + // Emit size of content not including length itself + Asm->OutStreamer->AddComment("Length of Unit"); + if (!DD->useSectionsAsReferences()) { + StringRef Prefix = isDwoUnit() ? "debug_info_dwo_" : "debug_info_"; + MCSymbol *BeginLabel = Asm->createTempSymbol(Prefix + "start"); + EndLabel = Asm->createTempSymbol(Prefix + "end"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + Asm->OutStreamer->EmitLabel(BeginLabel); + } else + Asm->emitInt32(getHeaderSize() + getUnitDie().getSize()); + + Asm->OutStreamer->AddComment("DWARF version number"); + unsigned Version = DD->getDwarfVersion(); + Asm->emitInt16(Version); + + // DWARF v5 reorders the address size and adds a unit type. + if (Version >= 5) { + Asm->OutStreamer->AddComment("DWARF Unit Type"); + Asm->emitInt8(UT); + Asm->OutStreamer->AddComment("Address Size (in bytes)"); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); + } + + // We share one abbreviations table across all units so it's always at the + // start of the section. Use a relocatable offset where needed to ensure + // linking doesn't invalidate that offset. + Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + if (UseOffsets) + Asm->emitInt32(0); + else + Asm->emitDwarfSymbolReference( + TLOF.getDwarfAbbrevSection()->getBeginSymbol(), false); + + if (Version <= 4) { + Asm->OutStreamer->AddComment("Address Size (in bytes)"); + Asm->emitInt8(Asm->MAI->getCodePointerSize()); + } +} + +void DwarfTypeUnit::emitHeader(bool UseOffsets) { + DwarfUnit::emitCommonHeader(UseOffsets, + DD->useSplitDwarf() ? dwarf::DW_UT_split_type + : dwarf::DW_UT_type); + Asm->OutStreamer->AddComment("Type Signature"); + Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer->AddComment("Type DIE Offset"); + // In a skeleton type unit there is no type DIE so emit a zero offset. + Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0, + sizeof(Ty->getOffset())); +} + +DIE::value_iterator +DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + return Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +DIE::value_iterator +DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + return addSectionDelta(Die, Attribute, Label, Sec); +} + +bool DwarfTypeUnit::isDwoUnit() const { + // Since there are no skeleton type units, all type units are dwo type units + // when split DWARF is being used. + return DD->useSplitDwarf(); +} + +void DwarfTypeUnit::addGlobalName(StringRef Name, const DIE &Die, + const DIScope *Context) { + getCU().addGlobalNameForTypeUnit(Name, Context); +} + +void DwarfTypeUnit::addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) { + getCU().addGlobalTypeUnitType(Ty, Context); +} + +const MCSymbol *DwarfUnit::getCrossSectionRelativeBaseAddress() const { + if (!Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return nullptr; + if (isDwoUnit()) + return nullptr; + return getSection()->getBeginSymbol(); +} + +void DwarfUnit::addStringOffsetsStart() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(getUnitDie(), dwarf::DW_AT_str_offsets_base, + DU->getStringOffsetsStartSym(), + TLOF.getDwarfStrOffSection()->getBeginSymbol()); +} + +void DwarfUnit::addRnglistsBase() { + assert(DD->getDwarfVersion() >= 5 && + "DW_AT_rnglists_base requires DWARF version 5 or later"); + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + addSectionLabel(getUnitDie(), dwarf::DW_AT_rnglists_base, + DU->getRnglistsTableBaseSym(), + TLOF.getDwarfRnglistsSection()->getBeginSymbol()); +} + +void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) { + addFlag(D, dwarf::DW_AT_declaration); + StringRef Name = CTy->getName(); + if (!Name.empty()) + addString(D, dwarf::DW_AT_name, Name); + getCU().createTypeDIE(CTy); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h new file mode 100644 index 000000000000..46c52a1faf4b --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -0,0 +1,374 @@ +//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H + +#include "DwarfDebug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" + +namespace llvm { + +class MachineLocation; +class MachineOperand; +class ConstantInt; +class ConstantFP; +class DbgVariable; +class DwarfCompileUnit; + +//===----------------------------------------------------------------------===// +/// This dwarf writer support class manages information associated with a +/// source file. +class DwarfUnit : public DIEUnit { +protected: + /// MDNode for the compile unit. + const DICompileUnit *CUNode; + + // All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + /// Target of Dwarf emission. + AsmPrinter *Asm; + + /// Emitted at the end of the CU and used to compute the CU Length field. + MCSymbol *EndLabel = nullptr; + + // Holders for some common dwarf information. + DwarfDebug *DD; + DwarfFile *DU; + + /// An anonymous type for index type. Owned by DIEUnit. + DIE *IndexTyDie; + + /// Tracks the mapping of unit level debug information variables to debug + /// information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + /// A list of all the DIELocs in use. + std::vector<DIELoc *> DIELocs; + + /// This map is used to keep track of subprogram DIEs that need + /// DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const DINode *> ContainingTypeMap; + + DwarfUnit(dwarf::Tag, const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU); + + bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); + + bool shareAcrossDWOCUs() const; + bool isShareableAcrossCUs(const DINode *D) const; + +public: + // Accessors. + AsmPrinter* getAsmPrinter() const { return Asm; } + MCSymbol *getEndLabel() const { return EndLabel; } + uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } + const DICompileUnit *getCUNode() const { return CUNode; } + + uint16_t getDwarfVersion() const { return DD->getDwarfVersion(); } + + /// Return true if this compile unit has something to write out. + bool hasContent() const { return getUnitDie().hasChildren(); } + + /// Get string containing language specific context for a global name. + /// + /// Walks the metadata parent chain in a language specific manner (using the + /// compile unit language) and returns it as a string. This is done at the + /// metadata level because DIEs may not currently have been added to the + /// parent context and walking the DIEs looking for names is more expensive + /// than walking the metadata. + std::string getParentContextString(const DIScope *Context) const; + + /// Add a new global name to the compile unit. + virtual void addGlobalName(StringRef Name, const DIE &Die, + const DIScope *Context) = 0; + + /// Add a new global type to the compile unit. + virtual void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) = 0; + + /// Returns the DIE map slot for the specified debug variable. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + DIE *getDIE(const DINode *D) const; + + /// Returns a fresh newly allocated DIELoc. + DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; } + + /// Insert DIE into the map. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + void insertDIE(const DINode *Desc, DIE *D); + + void insertDIE(DIE *D); + + /// Add a flag that is true to the DIE. + void addFlag(DIE &Die, dwarf::Attribute Attribute); + + /// Add an unsigned integer attribute data and value. + void addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer); + + void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer); + + /// Add an signed integer attribute data and value. + void addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer); + + void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); + + /// Add a string attribute data and value. + /// + /// We always emit a reference to the string pool instead of immediate + /// strings so that DIEs have more predictable sizes. In the case of split + /// dwarf we emit an index into another table which gets us the static offset + /// into the string table. + void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); + + /// Add a Dwarf label attribute data and value. + DIEValueList::value_iterator addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label); + + void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); + + /// Add an offset into a section attribute data and value. + void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); + + /// Add a dwarf op address data and value using the form given and an + /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addOpAddress(DIELoc &Die, const MCSymbol *Sym); + + /// Add a label delta attribute data and value. + void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); + + /// Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); + + /// Add a type's DW_AT_signature and set the declaration flag. + void addDIETypeSignature(DIE &Die, uint64_t Signature); + + /// Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc); + + /// Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); + + /// Add location information to specified debug information entry. + void addSourceLine(DIE &Die, unsigned Line, const DIFile *File); + void addSourceLine(DIE &Die, const DILocalVariable *V); + void addSourceLine(DIE &Die, const DIGlobalVariable *G); + void addSourceLine(DIE &Die, const DISubprogram *SP); + void addSourceLine(DIE &Die, const DILabel *L); + void addSourceLine(DIE &Die, const DIType *Ty); + void addSourceLine(DIE &Die, const DIObjCProperty *Ty); + + /// Add constant value entry in variable DIE. + void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty); + void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); + void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); + void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty); + void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); + + /// Add constant value entry in variable DIE. + void addConstantFPValue(DIE &Die, const MachineOperand &MO); + void addConstantFPValue(DIE &Die, const ConstantFP *CFP); + + /// Add a linkage name, if it isn't empty. + void addLinkageName(DIE &Die, StringRef LinkageName); + + /// Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DINodeArray TParams); + + /// Add thrown types. + void addThrownTypes(DIE &Die, DINodeArray ThrownTypes); + + /// Add a new type attribute to the specified entity. + /// + /// This takes and attribute parameter because DW_AT_friend attributes are + /// also type references. + void addType(DIE &Entity, const DIType *Ty, + dwarf::Attribute Attribute = dwarf::DW_AT_type); + + DIE *getOrCreateNameSpace(const DINamespace *NS); + DIE *getOrCreateModule(const DIModule *M); + DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false); + + void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, + bool SkipSPAttributes = false); + + /// Creates type DIE with specific context. + DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty); + + /// Find existing DIE or create new DIE for the given type. + DIE *getOrCreateTypeDIE(const MDNode *TyNode); + + /// Get context owner's DIE. + DIE *getOrCreateContextDIE(const DIScope *Context); + + /// Construct DIEs for types that contain vtables. + void constructContainingTypeDIEs(); + + /// Construct function argument DIEs. + void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); + + bool useSegmentedStringOffsetsTable() const { + return DD->useSegmentedStringOffsetsTable(); + } + + /// Compute the size of a header for this unit, not including the initial + /// length field. + virtual unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t) + // Pointer Size (in bytes) + (DD->getDwarfVersion() >= 5 ? sizeof(int8_t) + : 0); // DWARF v5 unit type + } + + /// Emit the header for this unit, not including the initial length field. + virtual void emitHeader(bool UseOffsets) = 0; + + /// Add the DW_AT_str_offsets_base attribute to the unit DIE. + void addStringOffsetsStart(); + + /// Add the DW_AT_rnglists_base attribute to the unit DIE. + void addRnglistsBase(); + + virtual DwarfCompileUnit &getCU() = 0; + + void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); + + /// addSectionDelta - Add a label delta attribute data and value. + DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// Add a Dwarf section label attribute data and value. + DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec); + + /// If the \p File has an MD5 checksum, return it as an MD5Result + /// allocated in the MCContext. + Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const; + + /// Get context owner's DIE. + DIE *createTypeDIE(const DICompositeType *Ty); + +protected: + ~DwarfUnit(); + + /// Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); + + /// Look up the source ID for the given file. If none currently exists, + /// create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(const DIFile *File) = 0; + + /// Emit the common part of the header for this unit. + void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT); + +private: + void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); + void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); + void constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy); + void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy); + void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy); + void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); + DIE &constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); + void constructTemplateTypeParameterDIE(DIE &Buffer, + const DITemplateTypeParameter *TP); + void constructTemplateValueParameterDIE(DIE &Buffer, + const DITemplateValueParameter *TVP); + + /// Return the default lower bound for an array. + /// + /// If the DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// Get an anonymous type for index type. + DIE *getIndexTyDie(); + + /// Set D as anonymous type for index which can be reused later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0; + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, + const DIE &TyDIE); + + virtual bool isDwoUnit() const = 0; + const MCSymbol *getCrossSectionRelativeBaseAddress() const override; +}; + +class DwarfTypeUnit final : public DwarfUnit { + uint64_t TypeSignature; + const DIE *Ty; + DwarfCompileUnit &CU; + MCDwarfDwoLineTable *SplitLineTable; + bool UsedLineTable = false; + + unsigned getOrCreateSourceID(const DIFile *File) override; + void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override; + bool isDwoUnit() const override; + +public: + DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU, MCDwarfDwoLineTable *SplitLineTable = nullptr); + + void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } + void setType(const DIE *Ty) { this->Ty = Ty; } + + /// Emit the header for this unit, not including the initial length field. + void emitHeader(bool UseOffsets) override; + unsigned getHeaderSize() const override { + return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature + sizeof(uint32_t); // Type DIE Offset + } + void addGlobalName(StringRef Name, const DIE &Die, + const DIScope *Context) override; + void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; + DwarfCompileUnit &getCU() override { return CU; } +}; +} // end llvm namespace +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp new file mode 100644 index 000000000000..31dfaaac836e --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -0,0 +1,654 @@ +//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#include "EHStreamer.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <vector> + +using namespace llvm; + +EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} + +EHStreamer::~EHStreamer() = default; + +/// How many leading type ids two landing pads have in common. +unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// Compute the actions table and gather the first action index for each landing +/// pad site. +void EHStreamer::computeActionsTable( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + + const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds(); + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= getULEB128Size(*I); + } + + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; // Total size of all action entries for a function + const LandingPadInfo *PrevLPI = nullptr; + + for (SmallVectorImpl<const LandingPadInfo *>::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector<int> &TypeIds = LPI->TypeIds; + unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; + unsigned SizeSiteActions = 0; // Total size of all entries for a landingpad + + if (NumShared < TypeIds.size()) { + // Size of one action entry (typeid + next action) + unsigned SizeActionEntry = 0; + unsigned PrevAction = (unsigned)-1; + + if (NumShared) { + unsigned SizePrevIds = PrevLPI->TypeIds.size(); + assert(Actions.size()); + PrevAction = Actions.size() - 1; + SizeActionEntry = getSLEB128Size(Actions[PrevAction].NextAction) + + getSLEB128Size(Actions[PrevAction].ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); + SizeActionEntry -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeActionEntry += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; + } + } + + // Compute the actions. + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = + isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); + + int NextAction = SizeActionEntry ? -(SizeActionEntry + SizeTypeID) : 0; + SizeActionEntry = SizeTypeID + getSLEB128Size(NextAction); + SizeSiteActions += SizeActionEntry; + + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; + Actions.push_back(Action); + PrevAction = Actions.size() - 1; + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeActionEntry + 1; + } // else identical - re-use previous FirstAction + + // Information used when creating the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 indicating the start of the actions table), and 0 + // indicates that there are no actions. + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + + PrevLPI = LPI; + } +} + +/// Return `true' if this is a call to a function marked `nounwind'. Return +/// `false' otherwise. +bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (!MO.isGlobal()) continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) continue; + + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + + return MarkedNoUnwind; +} + +void EHStreamer::computePadMap( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap) { + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced so we can put them in the LSDA. + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } +} + +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); + + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = nullptr; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last CallSite entry was for an invoke. + bool PreviousIsInvoke = false; + + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + + // Visit all instructions in order of address. + for (const auto &MBB : *Asm->MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) { + CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (!LandingPad->LandingPadLabel) { + // Create a gap. + PreviousIsInvoke = false; + } else { + // This try-range is for an invoke. + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && !IsSJLJ) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + if (!IsSJLJ) + CallSites.push_back(Site); + else { + // SjLj EH must maintain the call sites in the order assigned + // to them by the SjLjPrepare pass. + unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel); + if (CallSites.size() < SiteNo) + CallSites.resize(SiteNo); + CallSites[SiteNo - 1] = Site; + } + PreviousIsInvoke = true; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing && !IsSJLJ) { + CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; + CallSites.push_back(Site); + } +} + +/// Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +/// +/// Returns the starting symbol of an exception table. +MCSymbol *EHStreamer::emitExceptionTable() { + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads(); + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + // Order landing pads lexicographically by type id. + llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) { + return L->TypeIds < R->TypeIds; + }); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + computeActionsTable(LandingPads, Actions, FirstActions); + + // Compute the call-site table. + SmallVector<CallSiteEntry, 64> CallSites; + computeCallSiteTable(CallSites, LandingPads, FirstActions); + + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm; + unsigned CallSiteEncoding = + IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) : + Asm->getObjFileLowering().getCallSiteEncoding(); + bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty(); + + // Type infos. + MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeEncoding; + + if (!HaveTTData) { + // If there is no TypeInfo, then we just explicitly say that we're omitting + // that bit. + TTypeEncoding = dwarf::DW_EH_PE_omit; + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + } + + // Begin the exception table. + // Sometimes we want not to emit the data into separate section (e.g. ARM + // EHABI). In this case LSDASection will be NULL. + if (LSDASection) + Asm->OutStreamer->SwitchSection(LSDASection); + Asm->EmitAlignment(Align(4)); + + // Emit the LSDA. + MCSymbol *GCCETSym = + Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ + Twine(Asm->getFunctionNumber())); + Asm->OutStreamer->EmitLabel(GCCETSym); + Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym()); + + // Emit the LSDA header. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + Asm->EmitEncodingByte(TTypeEncoding, "@TType"); + + MCSymbol *TTBaseLabel = nullptr; + if (HaveTTData) { + // N.B.: There is a dependency loop between the size of the TTBase uleb128 + // here and the amount of padding before the aligned type table. The + // assembler must sometimes pad this uleb128 or insert extra padding before + // the type table. See PR35809 or GNU as bug 4029. + MCSymbol *TTBaseRefLabel = Asm->createTempSymbol("ttbaseref"); + TTBaseLabel = Asm->createTempSymbol("ttbase"); + Asm->EmitLabelDifferenceAsULEB128(TTBaseLabel, TTBaseRefLabel); + Asm->OutStreamer->EmitLabel(TTBaseRefLabel); + } + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + // Emit the landing pad call site table. + MCSymbol *CstBeginLabel = Asm->createTempSymbol("cst_begin"); + MCSymbol *CstEndLabel = Asm->createTempSymbol("cst_end"); + Asm->EmitEncodingByte(CallSiteEncoding, "Call site"); + Asm->EmitLabelDifferenceAsULEB128(CstEndLabel, CstBeginLabel); + Asm->OutStreamer->EmitLabel(CstBeginLabel); + + // SjLj / Wasm Exception handling + if (IsSJLJ || IsWasm) { + unsigned idx = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Index of the call site entry. + if (VerboseAsm) { + Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); + } + Asm->EmitULEB128(idx); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" Action: cleanup"); + else + Asm->OutStreamer->AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } else { + // Itanium LSDA exception handling + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + unsigned Entry = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + + MCSymbol *BeginLabel = S.BeginLabel; + if (!BeginLabel) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (!EndLabel) + EndLabel = Asm->getFunctionEnd(); + + // Offset of the call site relative to the start of the procedure. + if (VerboseAsm) + Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); + Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding); + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding); + + // Offset of the landing pad relative to the start of the procedure. + if (!S.LPad) { + if (VerboseAsm) + Asm->OutStreamer->AddComment(" has no landing pad"); + Asm->EmitCallSiteValue(0, CallSiteEncoding); + } else { + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" jumps to ") + + S.LPad->LandingPadLabel->getName()); + Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym, + CallSiteEncoding); + } + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" On action: cleanup"); + else + Asm->OutStreamer->AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } + Asm->OutStreamer->EmitLabel(CstEndLabel); + + // Emit the Action Table. + int Entry = 0; + for (SmallVectorImpl<ActionEntry>::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + + if (VerboseAsm) { + // Emit comments that decode the action table. + Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<"); + } + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. + if (VerboseAsm) { + if (Action.ValueForTypeID > 0) + Asm->OutStreamer->AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); + else if (Action.ValueForTypeID < 0) + Asm->OutStreamer->AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); + else + Asm->OutStreamer->AddComment(" Cleanup"); + } + Asm->EmitSLEB128(Action.ValueForTypeID); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + if (VerboseAsm) { + if (Action.NextAction == 0) { + Asm->OutStreamer->AddComment(" No further actions"); + } else { + unsigned NextAction = Entry + (Action.NextAction + 1) / 2; + Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction)); + } + } + Asm->EmitSLEB128(Action.NextAction); + } + + if (HaveTTData) { + Asm->EmitAlignment(Align(4)); + emitTypeInfos(TTypeEncoding, TTBaseLabel); + } + + Asm->EmitAlignment(Align(4)); + return GCCETSym; +} + +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) { + const MachineFunction *MF = Asm->MF; + const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MF->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), + TypeInfos.rend())) { + if (VerboseAsm) + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + Asm->OutStreamer->EmitLabel(TTBaseLabel); + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (isFilterEHSelector(TypeID)) + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitULEB128(TypeID); + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h new file mode 100644 index 000000000000..e62cf17a05d4 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -0,0 +1,140 @@ +//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class AsmPrinter; +struct LandingPadInfo; +class MachineInstr; +class MachineModuleInfo; +class MCSymbol; +template <typename T> class SmallVectorImpl; + +/// Emits exception handling directives. +class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// How many leading type ids two landing pads have in common. + static unsigned sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + using RangeMapType = DenseMap<MCSymbol *, PadRange>; + + /// Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // Null indicates the start of the function. + MCSymbol *EndLabel; // Null indicates the end of the function. + + // LPad contains the landing pad start labels. + const LandingPadInfo *LPad; // Null indicates that there is no landing pad. + + unsigned Action; + }; + + /// Compute the actions table and gather the first action index for each + /// landing pad site. + void computeActionsTable(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap); + + /// Compute the call-site table. The entry for an invoke has a try-range + /// containing the call, a non-zero landing pad and an appropriate action. + /// The entry for an ordinary call has a try-range containing the call and + /// zero for the landing pad and the action. Calls marked 'nounwind' have + /// no entry and must not be contained in the try-range of any entry - they + /// form gaps in the table. Entries must be ordered by try-range address. + virtual void computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions); + + /// Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + /// + /// Returns the starting symbol of an exception table. + MCSymbol *emitExceptionTable(); + + virtual void emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel); + + // Helpers for identifying what kind of clause an EH typeid or selector + // corresponds to. Negative selectors are for filter clauses, the zero + // selector is for cleanups, and positive selectors are for catch clauses. + static bool isFilterEHSelector(int Selector) { return Selector < 0; } + static bool isCleanupEHSelector(int Selector) { return Selector == 0; } + static bool isCatchEHSelector(int Selector) { return Selector > 0; } + +public: + EHStreamer(AsmPrinter *A); + ~EHStreamer() override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + static bool callToNoUnwindFunction(const MachineInstr *MI); +}; + +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H diff --git a/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 000000000000..3849644d1584 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,121 @@ +//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BuiltinGCs.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +namespace { + +class ErlangGCPrinter : public GCMetadataPrinter { +public: + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; + +} // end anonymous namespace + +static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> + X("erlang", "erlang-compatible garbage collector"); + +void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + MCStreamer &OS = *AP.OutStreamer; + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); + + // Put this in a custom .note section. + OS.SwitchSection( + AP.getObjFileLowering().getContext().getELFSection(".note.gc", + ELF::SHT_PROGBITS, 0)); + + // For each function... + for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + FI != IE; ++FI) { + GCFunctionInfo &MD = **FI; + if (MD.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_<FUNCTIONNAME>; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.emitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.emitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs + ? MD.getFunction().arg_size() - RegisteredArgs + : 0; + OS.AddComment("stack arity"); + AP.emitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.emitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.emitInt16(LI->StackOffset / IntPtrSize); + } + } +} + +void llvm::linkErlangGCPrinter() {} diff --git a/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 000000000000..b4eda5fa8c58 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,186 @@ +//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BuiltinGCs.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDirectives.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include <cctype> +#include <cstddef> +#include <cstdint> +#include <string> + +using namespace llvm; + +namespace { + +class OcamlGCMetadataPrinter : public GCMetadataPrinter { +public: + void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; + +} // end anonymous namespace + +static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> + Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() {} + +static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string SymName; + SymName += "caml"; + size_t Letter = SymName.size(); + SymName.append(MId.begin(), llvm::find(MId, '.')); + SymName += "__"; + SymName += Id; + + // Capitalize the first letter of the module name. + SymName[Letter] = toupper(SymName[Letter]); + + SmallString<128> TmpStr; + Mangler::getNameWithPrefix(TmpStr, SymName, M.getDataLayout()); + + MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr); + + AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer->EmitLabel(Sym); +} + +void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(M, AP, "code_begin"); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(M, AP, "code_end"); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "data_end"); + + // FIXME: Why does ocaml emit this?? + AP.OutStreamer->EmitIntValue(0, IntPtrSize); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "frametable"); + + int NumDescriptors = 0; + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { + GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + NumDescriptors++; + } + } + + if (NumDescriptors >= 1 << 16) { + // Very rude! + report_fatal_error(" Too much descriptor for ocaml GC"); + } + AP.emitInt16(NumDescriptors); + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { + GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1 << 16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + + Twine(FrameSize) + ">= 65536.\n" + "(" + + Twine(uintptr_t(&FI)) + ")"); + } + + AP.OutStreamer->AddComment("live roots for " + + Twine(FI.getFunction().getName())); + AP.OutStreamer->AddBlankLine(); + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1 << 16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count " + + Twine(LiveCount) + " >= 65536."); + } + + AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); + AP.emitInt16(FrameSize); + AP.emitInt16(LiveCount); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); + K != KE; ++K) { + if (K->StackOffset >= 1 << 16) { + // Very rude! + report_fatal_error( + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + } + AP.emitInt16(K->StackOffset); + } + + AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8)); + } + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp new file mode 100644 index 000000000000..444b0ed17b6d --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp @@ -0,0 +1,96 @@ +//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing WebAssembly exception info into asm +// files. +// +//===----------------------------------------------------------------------===// + +#include "WasmException.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +using namespace llvm; + +void WasmException::endModule() { + // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote + // this is a C++ exception. This symbol has to be emitted somewhere once in + // the module. Check if the symbol has already been created, i.e., we have at + // least one 'throw' or 'br_on_exn' instruction in the module, and emit the + // symbol only if so. + SmallString<60> NameStr; + Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout()); + if (Asm->OutContext.lookupSymbol(NameStr)) { + MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception"); + Asm->OutStreamer->EmitLabel(ExceptionSym); + } +} + +void WasmException::markFunctionEnd() { + // Get rid of any dead landing pads. + if (!Asm->MF->getLandingPads().empty()) { + auto *NonConstMF = const_cast<MachineFunction *>(Asm->MF); + // Wasm does not set BeginLabel and EndLabel information for landing pads, + // so we should set the second argument false. + NonConstMF->tidyLandingPads(nullptr, /* TidyIfNoBeginLabels */ false); + } +} + +void WasmException::endFunction(const MachineFunction *MF) { + bool ShouldEmitExceptionTable = false; + for (const LandingPadInfo &Info : MF->getLandingPads()) { + if (MF->hasWasmLandingPadIndex(Info.LandingPadBlock)) { + ShouldEmitExceptionTable = true; + break; + } + } + if (!ShouldEmitExceptionTable) + return; + MCSymbol *LSDALabel = emitExceptionTable(); + assert(LSDALabel && ".GCC_exception_table has not been emitted!"); + + // Wasm requires every data section symbol to have a .size set. So we emit an + // end marker and set the size as the difference between the start end the end + // marker. + MCSymbol *LSDAEndLabel = Asm->createTempSymbol("GCC_except_table_end"); + Asm->OutStreamer->EmitLabel(LSDAEndLabel); + MCContext &OutContext = Asm->OutStreamer->getContext(); + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(LSDAEndLabel, OutContext), + MCSymbolRefExpr::create(LSDALabel, OutContext), OutContext); + Asm->OutStreamer->emitELFSize(LSDALabel, SizeExp); +} + +// Compute the call-site table for wasm EH. Even though we use the same function +// name to share the common routines, a call site entry in the table corresponds +// to not a call site for possibly-throwing functions but a landing pad. In wasm +// EH the VM is responsible for stack unwinding. After an exception occurs and +// the stack is unwound, the control flow is transferred to wasm 'catch' +// instruction by the VM, after which the personality function is called from +// the compiler-generated code. Refer to WasmEHPrepare pass for more +// information. +void WasmException::computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + MachineFunction &MF = *Asm->MF; + for (unsigned I = 0, N = LandingPads.size(); I < N; ++I) { + const LandingPadInfo *Info = LandingPads[I]; + MachineBasicBlock *LPad = Info->LandingPadBlock; + // We don't emit LSDA for single catch (...). + if (!MF.hasWasmLandingPadIndex(LPad)) + continue; + // Wasm EH must maintain the EH pads in the order assigned to them by the + // WasmEHPrepare pass. + unsigned LPadIndex = MF.getWasmLandingPadIndex(LPad); + CallSiteEntry Site = {nullptr, nullptr, Info, FirstActions[I]}; + if (CallSites.size() < LPadIndex + 1) + CallSites.resize(LPadIndex + 1); + CallSites[LPadIndex] = Site; + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.h b/llvm/lib/CodeGen/AsmPrinter/WasmException.h new file mode 100644 index 000000000000..1893b6b2df43 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.h @@ -0,0 +1,41 @@ +//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing WebAssembly exception info into asm +// files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WASMEXCEPTION_H + +#include "EHStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WasmException : public EHStreamer { +public: + WasmException(AsmPrinter *A) : EHStreamer(A) {} + + void endModule() override; + void beginFunction(const MachineFunction *MF) override {} + virtual void markFunctionEnd() override; + void endFunction(const MachineFunction *MF) override; + +protected: + // Compute the call site table for wasm EH. + void computeCallSiteTable( + SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) override; +}; + +} // End of namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp new file mode 100644 index 000000000000..290be81c6baa --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp @@ -0,0 +1,44 @@ +//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinCFGuard.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" + +#include <vector> + +using namespace llvm; + +WinCFGuard::WinCFGuard(AsmPrinter *A) : AsmPrinterHandler(), Asm(A) {} + +WinCFGuard::~WinCFGuard() {} + +void WinCFGuard::endModule() { + const Module *M = Asm->MMI->getModule(); + std::vector<const Function *> Functions; + for (const Function &F : *M) + if (F.hasAddressTaken()) + Functions.push_back(&F); + if (Functions.empty()) + return; + auto &OS = *Asm->OutStreamer; + OS.SwitchSection(Asm->OutContext.getObjectFileInfo()->getGFIDsSection()); + for (const Function *F : Functions) + OS.EmitCOFFSymbolIndex(Asm->getSymbol(F)); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h new file mode 100644 index 000000000000..def0a59ab007 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h @@ -0,0 +1,53 @@ +//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCFGUARD_H + +#include "llvm/CodeGen/AsmPrinterHandler.h" +#include "llvm/Support/Compiler.h" + +namespace llvm { + +class LLVM_LIBRARY_VISIBILITY WinCFGuard : public AsmPrinterHandler { + /// Target of directive emission. + AsmPrinter *Asm; + +public: + WinCFGuard(AsmPrinter *A); + ~WinCFGuard() override; + + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + + /// Emit the Control Flow Guard function ID table + void endModule() override; + + /// Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + void beginFunction(const MachineFunction *MF) override {} + + /// Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + void endFunction(const MachineFunction *MF) override {} + + /// Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override {} + + /// Process end of an instruction. + void endInstruction() override {} +}; + +} // namespace llvm + +#endif diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp new file mode 100644 index 000000000000..0398675577cd --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -0,0 +1,1316 @@ +//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinException.h" +#include "llvm/ADT/Twine.h" +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +using namespace llvm; + +WinException::WinException(AsmPrinter *A) : EHStreamer(A) { + // MSVC's EH tables are always composed of 32-bit words. All known 64-bit + // platforms use an imagerel32 relocation to refer to symbols. + useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64); + isAArch64 = Asm->TM.getTargetTriple().isAArch64(); +} + +WinException::~WinException() {} + +/// endModule - Emit all exception information that should come after the +/// content. +void WinException::endModule() { + auto &OS = *Asm->OutStreamer; + const Module *M = MMI->getModule(); + for (const Function &F : *M) + if (F.hasFnAttribute("safeseh")) + OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); +} + +void WinException::beginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MF->getLandingPads().empty(); + bool hasEHFunclets = MF->hasEHFunclets(); + + const Function &F = MF->getFunction(); + + shouldEmitMoves = Asm->needsSEHMoves() && MF->hasWinCFI(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + EHPersonality Per = EHPersonality::Unknown; + const Function *PerFn = nullptr; + if (F.hasPersonalityFn()) { + PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + Per = classifyEHPersonality(PerFn); + } + + bool forceEmitPersonality = F.hasPersonalityFn() && + !isNoOpWithoutInvoke(Per) && + F.needsUnwindTableEntry(); + + shouldEmitPersonality = + forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && + PerEncoding != dwarf::DW_EH_PE_omit && PerFn); + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + // If we're not using CFI, we don't want the CFI or the personality, but we + // might want EH tables if we had EH pads. + if (!Asm->MAI->usesWindowsCFI()) { + if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) { + // If this is 32-bit SEH and we don't have any funclets (really invokes), + // make sure we emit the parent offset label. Some unreferenced filter + // functions may still refer to it. + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + StringRef FLinkageName = + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); + emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); + } + shouldEmitLSDA = hasEHFunclets; + shouldEmitPersonality = false; + return; + } + + beginFunclet(MF->front(), Asm->CurrentFnSym); +} + +void WinException::markFunctionEnd() { + if (isAArch64 && CurrentFuncletEntry && + (shouldEmitMoves || shouldEmitPersonality)) + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); +} + +/// endFunction - Gather and emit post-function exception information. +/// +void WinException::endFunction(const MachineFunction *MF) { + if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) + return; + + const Function &F = MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); + + // Get rid of any dead landing pads if we're not using funclets. In funclet + // schemes, the landing pad is not actually reachable. It only exists so + // that we can emit the right table data. + if (!isFuncletEHPersonality(Per)) { + MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF); + NonConstMF->tidyLandingPads(); + } + + endFuncletImpl(); + + // endFunclet will emit the necessary .xdata tables for x64 SEH. + if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets()) + return; + + if (shouldEmitPersonality || shouldEmitLSDA) { + Asm->OutStreamer->PushSection(); + + // Just switch sections to the right xdata section. + MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( + Asm->OutStreamer->getCurrentSectionOnly()); + Asm->OutStreamer->SwitchSection(XData); + + // Emit the tables appropriate to the personality function in use. If we + // don't recognize the personality, assume it uses an Itanium-style LSDA. + if (Per == EHPersonality::MSVC_Win64SEH) + emitCSpecificHandlerTable(MF); + else if (Per == EHPersonality::MSVC_X86SEH) + emitExceptHandlerTable(MF); + else if (Per == EHPersonality::MSVC_CXX) + emitCXXFrameHandler3Table(MF); + else if (Per == EHPersonality::CoreCLR) + emitCLRExceptionTable(MF); + else + emitExceptionTable(); + + Asm->OutStreamer->PopSection(); + } +} + +/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock. +static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + if (!MBB) + return nullptr; + + assert(MBB->isEHFuncletEntry()); + + // Give catches and cleanups a name based off of their parent function and + // their funclet entry block's number. + const MachineFunction *MF = MBB->getParent(); + const Function &F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); + MCContext &Ctx = MF->getContext(); + StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; + return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + + Twine(MBB->getNumber()) + "@?0?" + + FuncLinkageName + "@4HA"); +} + +void WinException::beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym) { + CurrentFuncletEntry = &MBB; + + const Function &F = Asm->MF->getFunction(); + // If a symbol was not provided for the funclet, invent one. + if (!Sym) { + Sym = getMCSymbolForMBB(Asm, &MBB); + + // Describe our funclet symbol as a function with internal linkage. + Asm->OutStreamer->BeginCOFFSymbolDef(Sym); + Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + Asm->OutStreamer->EndCOFFSymbolDef(); + + // We want our funclet's entry point to be aligned such that no nops will be + // present after the label. + Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), + &F); + + // Now that we've emitted the alignment directive, point at our funclet. + Asm->OutStreamer->EmitLabel(Sym); + } + + // Mark 'Sym' as starting our funclet. + if (shouldEmitMoves || shouldEmitPersonality) { + CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly(); + Asm->OutStreamer->EmitWinCFIStartProc(Sym); + } + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *PerFn = nullptr; + + // Determine which personality routine we are using for this funclet. + if (F.hasPersonalityFn()) + PerFn = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI); + + // Do not emit a .seh_handler directives for cleanup funclets. + // FIXME: This means cleanup funclets cannot handle exceptions. Given that + // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's + // inliner doesn't allow inlining them, this isn't a major problem in + // practice. + if (!CurrentFuncletEntry->isCleanupFuncletEntry()) + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + } +} + +void WinException::endFunclet() { + if (isAArch64 && CurrentFuncletEntry && + (shouldEmitMoves || shouldEmitPersonality)) { + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + } + endFuncletImpl(); +} + +void WinException::endFuncletImpl() { + // No funclet to process? Great, we have nothing to do. + if (!CurrentFuncletEntry) + return; + + const MachineFunction *MF = Asm->MF; + if (shouldEmitMoves || shouldEmitPersonality) { + const Function &F = MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F.hasPersonalityFn()) + Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); + + // On funclet exit, we emit a fake "function" end marker, so that the call + // to EmitWinEHHandlerData below can calculate the size of the funclet or + // function. + if (isAArch64) { + MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( + Asm->OutStreamer->getCurrentSectionOnly()); + Asm->OutStreamer->SwitchSection(XData); + } + + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + + if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && + !CurrentFuncletEntry->isCleanupFuncletEntry()) { + // If this is a C++ catch funclet (or the parent function), + // emit a reference to the LSDA for the parent function. + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); + MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( + Twine("$cppxdata$", FuncLinkageName)); + Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); + } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() && + !CurrentFuncletEntry->isEHFuncletEntry()) { + // If this is the parent function in Win64 SEH, emit the LSDA immediately + // following .seh_handlerdata. + emitCSpecificHandlerTable(MF); + } + + // Switch back to the funclet start .text section now that we are done + // writing to .xdata, and emit an .seh_endproc directive to mark the end of + // the function. + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); + Asm->OutStreamer->EmitWinCFIEndProc(); + } + + // Let's make sure we don't try to end the same funclet twice. + CurrentFuncletEntry = nullptr; +} + +const MCExpr *WinException::create32bitRef(const MCSymbol *Value) { + if (!Value) + return MCConstantExpr::create(0, Asm->OutContext); + return MCSymbolRefExpr::create(Value, useImageRel32 + ? MCSymbolRefExpr::VK_COFF_IMGREL32 + : MCSymbolRefExpr::VK_None, + Asm->OutContext); +} + +const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { + if (!GV) + return MCConstantExpr::create(0, Asm->OutContext); + return create32bitRef(Asm->getSymbol(GV)); +} + +const MCExpr *WinException::getLabel(const MCSymbol *Label) { + if (isAArch64) + return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); + return MCBinaryExpr::createAdd(create32bitRef(Label), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OffsetOf, Asm->OutContext), + MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext); +} + +const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +int WinException::getFrameIndexOffset(int FrameIndex, + const WinEHFuncInfo &FuncInfo) { + const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); + unsigned UnusedReg; + if (Asm->MAI->usesWindowsCFI()) { + int Offset = + TFI.getFrameIndexReferencePreferSP(*Asm->MF, FrameIndex, UnusedReg, + /*IgnoreSPUpdates*/ true); + assert(UnusedReg == + Asm->MF->getSubtarget() + .getTargetLowering() + ->getStackPointerRegisterToSaveRestore()); + return Offset; + } + + // For 32-bit, offsets should be relative to the end of the EH registration + // node. For 64-bit, it's relative to SP at the end of the prologue. + assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); + int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += FuncInfo.EHRegNodeEndOffset; + return Offset; +} + +namespace { + +/// Top-level state used to represent unwind to caller +const int NullState = -1; + +struct InvokeStateChange { + /// EH Label immediately after the last invoke in the previous state, or + /// nullptr if the previous state was the null state. + const MCSymbol *PreviousEndLabel; + + /// EH label immediately before the first invoke in the new state, or nullptr + /// if the new state is the null state. + const MCSymbol *NewStartLabel; + + /// State of the invoke following NewStartLabel, or NullState to indicate + /// the presence of calls which may unwind to caller. + int NewState; +}; + +/// Iterator that reports all the invoke state changes in a range of machine +/// basic blocks. Changes to the null state are reported whenever a call that +/// may unwind to caller is encountered. The MBB range is expected to be an +/// entire function or funclet, and the start and end of the range are treated +/// as being in the NullState even if there's not an unwind-to-caller call +/// before the first invoke or after the last one (i.e., the first state change +/// reported is the first change to something other than NullState, and a +/// change back to NullState is always reported at the end of iteration). +class InvokeStateChangeIterator { + InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo, + MachineFunction::const_iterator MFI, + MachineFunction::const_iterator MFE, + MachineBasicBlock::const_iterator MBBI, + int BaseState) + : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) { + LastStateChange.PreviousEndLabel = nullptr; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + scan(); + } + +public: + static iterator_range<InvokeStateChangeIterator> + range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin, + MachineFunction::const_iterator End, int BaseState = NullState) { + // Reject empty ranges to simplify bookkeeping by ensuring that we can get + // the end of the last block. + assert(Begin != End); + auto BlockBegin = Begin->begin(); + auto BlockEnd = std::prev(End)->end(); + return make_range( + InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState), + InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState)); + } + + // Iterator methods. + bool operator==(const InvokeStateChangeIterator &O) const { + assert(BaseState == O.BaseState); + // Must be visiting same block. + if (MFI != O.MFI) + return false; + // Must be visiting same isntr. + if (MBBI != O.MBBI) + return false; + // At end of block/instr iteration, we can still have two distinct states: + // one to report the final EndLabel, and another indicating the end of the + // state change iteration. Check for CurrentEndLabel equality to + // distinguish these. + return CurrentEndLabel == O.CurrentEndLabel; + } + + bool operator!=(const InvokeStateChangeIterator &O) const { + return !operator==(O); + } + InvokeStateChange &operator*() { return LastStateChange; } + InvokeStateChange *operator->() { return &LastStateChange; } + InvokeStateChangeIterator &operator++() { return scan(); } + +private: + InvokeStateChangeIterator &scan(); + + const WinEHFuncInfo &EHInfo; + const MCSymbol *CurrentEndLabel = nullptr; + MachineFunction::const_iterator MFI; + MachineFunction::const_iterator MFE; + MachineBasicBlock::const_iterator MBBI; + InvokeStateChange LastStateChange; + bool VisitingInvoke = false; + int BaseState; +}; + +} // end anonymous namespace + +InvokeStateChangeIterator &InvokeStateChangeIterator::scan() { + bool IsNewBlock = false; + for (; MFI != MFE; ++MFI, IsNewBlock = true) { + if (IsNewBlock) + MBBI = MFI->begin(); + for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + const MachineInstr &MI = *MBBI; + if (!VisitingInvoke && LastStateChange.NewState != BaseState && + MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) { + // Indicate a change of state to the null state. We don't have + // start/end EH labels handy but the caller won't expect them for + // null state regions. + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + CurrentEndLabel = nullptr; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + + // All other state changes are at EH labels before/after invokes. + if (!MI.isEHLabel()) + continue; + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + if (Label == CurrentEndLabel) { + VisitingInvoke = false; + continue; + } + auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label); + // Ignore EH labels that aren't the ones inserted before an invoke + if (InvokeMapIter == EHInfo.LabelToStateMap.end()) + continue; + auto &StateAndEnd = InvokeMapIter->second; + int NewState = StateAndEnd.first; + // Keep track of the fact that we're between EH start/end labels so + // we know not to treat the inoke we'll see as unwinding to caller. + VisitingInvoke = true; + if (NewState == LastStateChange.NewState) { + // The state isn't actually changing here. Record the new end and + // keep going. + CurrentEndLabel = StateAndEnd.second; + continue; + } + // Found a state change to report + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = Label; + LastStateChange.NewState = NewState; + // Start keeping track of the new current end + CurrentEndLabel = StateAndEnd.second; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + } + // Iteration hit the end of the block range. + if (LastStateChange.NewState != BaseState) { + // Report the end of the last new state + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + // Leave CurrentEndLabel non-null to distinguish this state from end. + assert(CurrentEndLabel != nullptr); + return *this; + } + // We've reported all state changes and hit the end state. + CurrentEndLabel = nullptr; + return *this; +} + +/// Emit the language-specific data that __C_specific_handler expects. This +/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning +/// up after faults with __try, __except, and __finally. The typeinfo values +/// are not really RTTI data, but pointers to filter functions that return an +/// integer (1, 0, or -1) indicating how to handle the exception. For __finally +/// blocks and other cleanups, the landing pad label is zero, and the filter +/// function is actually a cleanup handler with the same prototype. A catch-all +/// entry is modeled with a null filter function field and a non-zero landing +/// pad label. +/// +/// Possible filter function return values: +/// EXCEPTION_EXECUTE_HANDLER (1): +/// Jump to the landing pad label after cleanups. +/// EXCEPTION_CONTINUE_SEARCH (0): +/// Continue searching this table or continue unwinding. +/// EXCEPTION_CONTINUE_EXECUTION (-1): +/// Resume execution at the trapping PC. +/// +/// Inferred table structure: +/// struct Table { +/// int NumEntries; +/// struct Entry { +/// imagerel32 LabelStart; +/// imagerel32 LabelEnd; +/// imagerel32 FilterOrFinally; // One means catch-all. +/// imagerel32 LabelLPad; // Zero means __finally. +/// } Entries[NumEntries]; +/// }; +void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + if (!isAArch64) { + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.eh.recoverfp. + StringRef FLinkageName = + GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + } + + // Use the assembler to compute the number of table entries through label + // difference and division. + MCSymbol *TableBegin = + Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true); + MCSymbol *TableEnd = + Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true); + const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin); + const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx); + const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx); + AddComment("Number of call sites"); + OS.EmitValue(EntryCount, 4); + + OS.EmitLabel(TableBegin); + + // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only + // models exceptions from invokes. LLVM also allows arbitrary reordering of + // the code, so our tables end up looking a bit different. Rather than + // trying to match MSVC's tables exactly, we emit a denormalized table. For + // each range of invokes in the same state, we emit table entries for all + // the actions that would be taken in that state. This means our tables are + // slightly bigger, which is OK. + const MCSymbol *LastStartLabel = nullptr; + int LastEHState = -1; + // Break out before we enter into a finally funclet. + // FIXME: We need to emit separate EH tables for cleanups. + MachineFunction::const_iterator End = MF->end(); + MachineFunction::const_iterator Stop = std::next(MF->begin()); + while (Stop != End && !Stop->isEHFuncletEntry()) + ++Stop; + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) { + // Emit all the actions for the state we just transitioned out of + // if it was not the null state + if (LastEHState != -1) + emitSEHActionsForRange(FuncInfo, LastStartLabel, + StateChange.PreviousEndLabel, LastEHState); + LastStartLabel = StateChange.NewStartLabel; + LastEHState = StateChange.NewState; + } + + OS.EmitLabel(TableEnd); +} + +void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + assert(BeginLabel && EndLabel); + while (State != -1) { + const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State]; + const MCExpr *FilterOrFinally; + const MCExpr *ExceptOrNull; + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + if (UME.IsFinally) { + FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler)); + ExceptOrNull = MCConstantExpr::create(0, Ctx); + } else { + // For an except, the filter can be 1 (catch-all) or a function + // label. + FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter) + : MCConstantExpr::create(1, Ctx); + ExceptOrNull = create32bitRef(Handler->getSymbol()); + } + + AddComment("LabelStart"); + OS.EmitValue(getLabel(BeginLabel), 4); + AddComment("LabelEnd"); + OS.EmitValue(getLabel(EndLabel), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" + : "CatchAll"); + OS.EmitValue(FilterOrFinally, 4); + AddComment(UME.IsFinally ? "Null" : "ExceptionHandler"); + OS.EmitValue(ExceptOrNull, 4); + + assert(UME.ToState < State && "states should decrease"); + State = UME.ToState; + } +} + +void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { + const Function &F = MF->getFunction(); + auto &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + + StringRef FuncLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); + + SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable; + MCSymbol *FuncInfoXData = nullptr; + if (shouldEmitPersonality) { + // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from + // IPs to state numbers. + FuncInfoXData = + Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName)); + computeIP2StateTable(MF, FuncInfo, IPToStateTable); + } else { + FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName); + } + + int UnwindHelpOffset = 0; + if (Asm->MAI->usesWindowsCFI()) + UnwindHelpOffset = + getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); + + MCSymbol *UnwindMapXData = nullptr; + MCSymbol *TryBlockMapXData = nullptr; + MCSymbol *IPToStateXData = nullptr; + if (!FuncInfo.CxxUnwindMap.empty()) + UnwindMapXData = Asm->OutContext.getOrCreateSymbol( + Twine("$stateUnwindMap$", FuncLinkageName)); + if (!FuncInfo.TryBlockMap.empty()) + TryBlockMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName)); + if (!IPToStateTable.empty()) + IPToStateXData = + Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName)); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + // FuncInfo { + // uint32_t MagicNumber + // int32_t MaxState; + // UnwindMapEntry *UnwindMap; + // uint32_t NumTryBlocks; + // TryBlockMapEntry *TryBlockMap; + // uint32_t IPMapEntries; // always 0 for x86 + // IPToStateMapEntry *IPToStateMap; // always 0 for x86 + // uint32_t UnwindHelp; // non-x86 only + // ESTypeList *ESTypeList; + // int32_t EHFlags; + // } + // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. + // EHFlags & 2 -> ??? + // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. + OS.EmitValueToAlignment(4); + OS.EmitLabel(FuncInfoXData); + + AddComment("MagicNumber"); + OS.EmitIntValue(0x19930522, 4); + + AddComment("MaxState"); + OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4); + + AddComment("UnwindMap"); + OS.EmitValue(create32bitRef(UnwindMapXData), 4); + + AddComment("NumTryBlocks"); + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); + + AddComment("TryBlockMap"); + OS.EmitValue(create32bitRef(TryBlockMapXData), 4); + + AddComment("IPMapEntries"); + OS.EmitIntValue(IPToStateTable.size(), 4); + + AddComment("IPToStateXData"); + OS.EmitValue(create32bitRef(IPToStateXData), 4); + + if (Asm->MAI->usesWindowsCFI()) { + AddComment("UnwindHelp"); + OS.EmitIntValue(UnwindHelpOffset, 4); + } + + AddComment("ESTypeList"); + OS.EmitIntValue(0, 4); + + AddComment("EHFlags"); + OS.EmitIntValue(1, 4); + + // UnwindMapEntry { + // int32_t ToState; + // void (*Action)(); + // }; + if (UnwindMapXData) { + OS.EmitLabel(UnwindMapXData); + for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) { + MCSymbol *CleanupSym = + getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>()); + AddComment("ToState"); + OS.EmitIntValue(UME.ToState, 4); + + AddComment("Action"); + OS.EmitValue(create32bitRef(CleanupSym), 4); + } + } + + // TryBlockMap { + // int32_t TryLow; + // int32_t TryHigh; + // int32_t CatchHigh; + // int32_t NumCatches; + // HandlerType *HandlerArray; + // }; + if (TryBlockMapXData) { + OS.EmitLabel(TryBlockMapXData); + SmallVector<MCSymbol *, 1> HandlerMaps; + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + + MCSymbol *HandlerMapXData = nullptr; + if (!TBME.HandlerArray.empty()) + HandlerMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$") + .concat(Twine(I)) + .concat("$") + .concat(FuncLinkageName)); + HandlerMaps.push_back(HandlerMapXData); + + // TBMEs should form intervals. + assert(0 <= TBME.TryLow && "bad trymap interval"); + assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval"); + assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval"); + assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) && + "bad trymap interval"); + + AddComment("TryLow"); + OS.EmitIntValue(TBME.TryLow, 4); + + AddComment("TryHigh"); + OS.EmitIntValue(TBME.TryHigh, 4); + + AddComment("CatchHigh"); + OS.EmitIntValue(TBME.CatchHigh, 4); + + AddComment("NumCatches"); + OS.EmitIntValue(TBME.HandlerArray.size(), 4); + + AddComment("HandlerArray"); + OS.EmitValue(create32bitRef(HandlerMapXData), 4); + } + + // All funclets use the same parent frame offset currently. + unsigned ParentFrameOffset = 0; + if (shouldEmitPersonality) { + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF); + } + + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = HandlerMaps[I]; + if (!HandlerMapXData) + continue; + // HandlerType { + // int32_t Adjectives; + // TypeDescriptor *Type; + // int32_t CatchObjOffset; + // void (*Handler)(); + // int32_t ParentFrameOffset; // x64 and AArch64 only + // }; + OS.EmitLabel(HandlerMapXData); + for (const WinEHHandlerType &HT : TBME.HandlerArray) { + // Get the frame escape label with the offset of the catch object. If + // the index is INT_MAX, then there is no catch object, and we should + // emit an offset of zero, indicating that no copy will occur. + const MCExpr *FrameAllocOffsetRef = nullptr; + if (HT.CatchObj.FrameIndex != INT_MAX) { + int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); + assert(Offset != 0 && "Illegal offset for catch object!"); + FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); + } else { + FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); + } + + MCSymbol *HandlerSym = + getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>()); + + AddComment("Adjectives"); + OS.EmitIntValue(HT.Adjectives, 4); + + AddComment("Type"); + OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); + + AddComment("CatchObjOffset"); + OS.EmitValue(FrameAllocOffsetRef, 4); + + AddComment("Handler"); + OS.EmitValue(create32bitRef(HandlerSym), 4); + + if (shouldEmitPersonality) { + AddComment("ParentFrameOffset"); + OS.EmitIntValue(ParentFrameOffset, 4); + } + } + } + } + + // IPToStateMapEntry { + // void *IP; + // int32_t State; + // }; + if (IPToStateXData) { + OS.EmitLabel(IPToStateXData); + for (auto &IPStatePair : IPToStateTable) { + AddComment("IP"); + OS.EmitValue(IPStatePair.first, 4); + AddComment("ToState"); + OS.EmitIntValue(IPStatePair.second, 4); + } + } +} + +void WinException::computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) { + + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + // Find the end of the funclet + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + break; + } + } + + // Don't emit ip2state entries for cleanup funclets. Any interesting + // exceptional actions in cleanups must be handled in a separate IR + // function. + if (FuncletStart->isCleanupFuncletEntry()) + continue; + + MCSymbol *StartLabel; + int BaseState; + if (FuncletStart == MF->begin()) { + BaseState = NullState; + StartLabel = Asm->getFunctionBegin(); + } else { + auto *FuncletPad = + cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI()); + assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0); + BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second; + StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart); + } + assert(StartLabel && "need local function start label"); + IPToStateTable.push_back( + std::make_pair(create32bitRef(StartLabel), BaseState)); + + for (const auto &StateChange : InvokeStateChangeIterator::range( + FuncInfo, FuncletStart, FuncletEnd, BaseState)) { + // Compute the label to report as the start of this entry; use the EH + // start label for the invoke if we have one, otherwise (this is a call + // which may unwind to our caller and does not have an EH start label, so) + // use the previous end label. + const MCSymbol *ChangeLabel = StateChange.NewStartLabel; + if (!ChangeLabel) + ChangeLabel = StateChange.PreviousEndLabel; + // Emit an entry indicating that PCs after 'Label' have this EH state. + IPToStateTable.push_back( + std::make_pair(getLabel(ChangeLabel), StateChange.NewState)); + // FIXME: assert that NewState is between CatchLow and CatchHigh. + } + } +} + +void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, + StringRef FLinkageName) { + // Outlined helpers called by the EH runtime need to know the offset of the EH + // registration in order to recover the parent frame pointer. Now that we know + // we've code generated the parent, we can emit the label assignment that + // those helpers use to get the offset of the registration node. + + // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if + // after optimization all the invokes were eliminated. We still need to emit + // the parent frame offset label, but it should be garbage and should never be + // used. + int64_t Offset = 0; + int FI = FuncInfo.EHRegNodeFrameIndex; + if (FI != INT_MAX) { + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI); + } + + MCContext &Ctx = Asm->OutContext; + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, + MCConstantExpr::create(Offset, Ctx)); +} + +/// Emit the language-specific data that _except_handler3 and 4 expect. This is +/// functionally equivalent to the __C_specific_handler table, except it is +/// indexed by state number instead of IP. +void WinException::emitExceptHandlerTable(const MachineFunction *MF) { + MCStreamer &OS = *Asm->OutStreamer; + const Function &F = MF->getFunction(); + StringRef FLinkageName = GlobalValue::dropLLVMManglingEscape(F.getName()); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); + + // Emit the __ehtable label that we use for llvm.x86.seh.lsda. + MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); + OS.EmitValueToAlignment(4); + OS.EmitLabel(LSDALabel); + + const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts()); + StringRef PerName = Per->getName(); + int BaseState = -1; + if (PerName == "_except_handler4") { + // The LSDA for _except_handler4 starts with this struct, followed by the + // scope table: + // + // struct EH4ScopeTable { + // int32_t GSCookieOffset; + // int32_t GSCookieXOROffset; + // int32_t EHCookieOffset; + // int32_t EHCookieXOROffset; + // ScopeTableEntry ScopeRecord[]; + // }; + // + // Offsets are %ebp relative. + // + // The GS cookie is present only if the function needs stack protection. + // GSCookieOffset = -2 means that GS cookie is not used. + // + // The EH cookie is always present. + // + // Check is done the following way: + // (ebp+CookieXOROffset) ^ [ebp+CookieOffset] == _security_cookie + + // Retrieve the Guard Stack slot. + int GSCookieOffset = -2; + const MachineFrameInfo &MFI = MF->getFrameInfo(); + if (MFI.hasStackProtectorIndex()) { + unsigned UnusedReg; + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + int SSPIdx = MFI.getStackProtectorIndex(); + GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg); + } + + // Retrieve the EH Guard slot. + // TODO(etienneb): Get rid of this value and change it for and assertion. + int EHCookieOffset = 9999; + if (FuncInfo.EHGuardFrameIndex != INT_MAX) { + unsigned UnusedReg; + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + int EHGuardIdx = FuncInfo.EHGuardFrameIndex; + EHCookieOffset = TFI->getFrameIndexReference(*MF, EHGuardIdx, UnusedReg); + } + + AddComment("GSCookieOffset"); + OS.EmitIntValue(GSCookieOffset, 4); + AddComment("GSCookieXOROffset"); + OS.EmitIntValue(0, 4); + AddComment("EHCookieOffset"); + OS.EmitIntValue(EHCookieOffset, 4); + AddComment("EHCookieXOROffset"); + OS.EmitIntValue(0, 4); + BaseState = -2; + } + + assert(!FuncInfo.SEHUnwindMap.empty()); + for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) { + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + const MCSymbol *ExceptOrFinally = + UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol(); + // -1 is usually the base state for "unwind to caller", but for + // _except_handler4 it's -2. Do that replacement here if necessary. + int ToState = UME.ToState == -1 ? BaseState : UME.ToState; + AddComment("ToState"); + OS.EmitIntValue(ToState, 4); + AddComment(UME.IsFinally ? "Null" : "FilterFunction"); + OS.EmitValue(create32bitRef(UME.Filter), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler"); + OS.EmitValue(create32bitRef(ExceptOrFinally), 4); + } +} + +static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) { + int Rank = 0; + while (State != -1) { + ++Rank; + State = FuncInfo.ClrEHUnwindMap[State].TryParentState; + } + return Rank; +} + +static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { + int LeftRank = getTryRank(FuncInfo, Left); + int RightRank = getTryRank(FuncInfo, Right); + + while (LeftRank < RightRank) { + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; + --RightRank; + } + + while (RightRank < LeftRank) { + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; + --LeftRank; + } + + while (Left != Right) { + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; + } + + return Left; +} + +void WinException::emitCLRExceptionTable(const MachineFunction *MF) { + // CLR EH "states" are really just IDs that identify handlers/funclets; + // states, handlers, and funclets all have 1:1 mappings between them, and a + // handler/funclet's "state" is its index in the ClrEHUnwindMap. + MCStreamer &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + MCSymbol *FuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *FuncEndSym = Asm->getFunctionEnd(); + + // A ClrClause describes a protected region. + struct ClrClause { + const MCSymbol *StartLabel; // Start of protected region + const MCSymbol *EndLabel; // End of protected region + int State; // Index of handler protecting the protected region + int EnclosingState; // Index of funclet enclosing the protected region + }; + SmallVector<ClrClause, 8> Clauses; + + // Build a map from handler MBBs to their corresponding states (i.e. their + // indices in the ClrEHUnwindMap). + int NumStates = FuncInfo.ClrEHUnwindMap.size(); + assert(NumStates > 0 && "Don't need exception table!"); + DenseMap<const MachineBasicBlock *, int> HandlerStates; + for (int State = 0; State < NumStates; ++State) { + MachineBasicBlock *HandlerBlock = + FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>(); + HandlerStates[HandlerBlock] = State; + // Use this loop through all handlers to verify our assumption (used in + // the MinEnclosingState computation) that enclosing funclets have lower + // state numbers than their enclosed funclets. + assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State && + "ill-formed state numbering"); + } + // Map the main function to the NullState. + HandlerStates[&MF->front()] = NullState; + + // Write out a sentinel indicating the end of the standard (Windows) xdata + // and the start of the additional (CLR) info. + OS.EmitIntValue(0xffffffff, 4); + // Write out the number of funclets + OS.EmitIntValue(NumStates, 4); + + // Walk the machine blocks/instrs, computing and emitting a few things: + // 1. Emit a list of the offsets to each handler entry, in lexical order. + // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end. + // 3. Compute the list of ClrClauses, in the required order (inner before + // outer, earlier before later; the order by which a forward scan with + // early termination will find the innermost enclosing clause covering + // a given address). + // 4. A map (MinClauseMap) from each handler index to the index of the + // outermost funclet/function which contains a try clause targeting the + // key handler. This will be used to determine IsDuplicate-ness when + // emitting ClrClauses. The NullState value is used to indicate that the + // top-level function contains a try clause targeting the key handler. + // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for + // try regions we entered before entering the PendingState try but which + // we haven't yet exited. + SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack; + // EndSymbolMap and MinClauseMap are maps described above. + std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]); + SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates); + + // Visit the root function and each funclet. + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + int FuncletState = HandlerStates[&*FuncletStart]; + // Find the end of the funclet + MCSymbol *EndSymbol = FuncEndSym; + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd); + break; + } + } + // Emit the function/funclet end and, if this is a funclet (and not the + // root function), record it in the EndSymbolMap. + OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4); + if (FuncletState != NullState) { + // Record the end of the handler. + EndSymbolMap[FuncletState] = EndSymbol; + } + + // Walk the state changes in this function/funclet and compute its clauses. + // Funclets always start in the null state. + const MCSymbol *CurrentStartLabel = nullptr; + int CurrentState = NullState; + assert(HandlerStack.empty()); + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) { + // Close any try regions we're not still under + int StillPendingState = + getTryAncestor(FuncInfo, CurrentState, StateChange.NewState); + while (CurrentState != StillPendingState) { + assert(CurrentState != NullState && + "Failed to find still-pending state!"); + // Close the pending clause + Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel, + CurrentState, FuncletState}); + // Now the next-outer try region is current + CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState; + // Pop the new start label from the handler stack if we've exited all + // inner try regions of the corresponding try region. + if (HandlerStack.back().second == CurrentState) + CurrentStartLabel = HandlerStack.pop_back_val().first; + } + + if (StateChange.NewState != CurrentState) { + // For each clause we're starting, update the MinClauseMap so we can + // know which is the topmost funclet containing a clause targeting + // it. + for (int EnteredState = StateChange.NewState; + EnteredState != CurrentState; + EnteredState = + FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) { + int &MinEnclosingState = MinClauseMap[EnteredState]; + if (FuncletState < MinEnclosingState) + MinEnclosingState = FuncletState; + } + // Save the previous current start/label on the stack and update to + // the newly-current start/state. + HandlerStack.emplace_back(CurrentStartLabel, CurrentState); + CurrentStartLabel = StateChange.NewStartLabel; + CurrentState = StateChange.NewState; + } + } + assert(HandlerStack.empty()); + } + + // Now emit the clause info, starting with the number of clauses. + OS.EmitIntValue(Clauses.size(), 4); + for (ClrClause &Clause : Clauses) { + // Emit a CORINFO_EH_CLAUSE : + /* + struct CORINFO_EH_CLAUSE + { + CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag + DWORD TryOffset; + DWORD TryLength; // actually TryEndOffset + DWORD HandlerOffset; + DWORD HandlerLength; // actually HandlerEndOffset + union + { + DWORD ClassToken; // use for catch clauses + DWORD FilterOffset; // use for filter clauses + }; + }; + + enum CORINFO_EH_CLAUSE_FLAGS + { + CORINFO_EH_CLAUSE_NONE = 0, + CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter + CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause + CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause + }; + typedef enum CorExceptionFlag + { + COR_ILEXCEPTION_CLAUSE_NONE, + COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause + COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause + COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause + COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This + // clause was duplicated + // to a funclet which was + // pulled out of line + } CorExceptionFlag; + */ + // Add 1 to the start/end of the EH clause; the IP associated with a + // call when the runtime does its scan is the IP of the next instruction + // (the one to which control will return after the call), so we need + // to add 1 to the end of the clause to cover that offset. We also add + // 1 to the start of the clause to make sure that the ranges reported + // for all clauses are disjoint. Note that we'll need some additional + // logic when machine traps are supported, since in that case the IP + // that the runtime uses is the offset of the faulting instruction + // itself; if such an instruction immediately follows a call but the + // two belong to different clauses, we'll need to insert a nop between + // them so the runtime can distinguish the point to which the call will + // return from the point at which the fault occurs. + + const MCExpr *ClauseBegin = + getOffsetPlusOne(Clause.StartLabel, FuncBeginSym); + const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym); + + const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State]; + MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>(); + MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock); + const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym); + MCSymbol *EndSym = EndSymbolMap[Clause.State]; + const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym); + + uint32_t Flags = 0; + switch (Entry.HandlerType) { + case ClrHandlerType::Catch: + // Leaving bits 0-2 clear indicates catch. + break; + case ClrHandlerType::Filter: + Flags |= 1; + break; + case ClrHandlerType::Finally: + Flags |= 2; + break; + case ClrHandlerType::Fault: + Flags |= 4; + break; + } + if (Clause.EnclosingState != MinClauseMap[Clause.State]) { + // This is a "duplicate" clause; the handler needs to be entered from a + // frame above the one holding the invoke. + assert(Clause.EnclosingState > MinClauseMap[Clause.State]); + Flags |= 8; + } + OS.EmitIntValue(Flags, 4); + + // Write the clause start/end + OS.EmitValue(ClauseBegin, 4); + OS.EmitValue(ClauseEnd, 4); + + // Write out the handler start/end + OS.EmitValue(HandlerBegin, 4); + OS.EmitValue(HandlerEnd, 4); + + // Write out the type token or filter offset + assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters"); + OS.EmitIntValue(Entry.TypeToken, 4); + } +} diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h new file mode 100644 index 000000000000..dc5036302131 --- /dev/null +++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -0,0 +1,115 @@ +//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H + +#include "EHStreamer.h" + +namespace llvm { +class Function; +class GlobalValue; +class MachineFunction; +class MCExpr; +class MCSection; +class Value; +struct WinEHFuncInfo; + +class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { + /// Per-function flag to indicate if personality info should be emitted. + bool shouldEmitPersonality = false; + + /// Per-function flag to indicate if the LSDA should be emitted. + bool shouldEmitLSDA = false; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves = false; + + /// True if this is a 64-bit target and we should use image relative offsets. + bool useImageRel32 = false; + + /// True if we are generating exception handling on Windows for ARM64. + bool isAArch64 = false; + + /// Pointer to the current funclet entry BB. + const MachineBasicBlock *CurrentFuncletEntry = nullptr; + + /// The section of the last funclet start. + MCSection *CurrentFuncletTextSection = nullptr; + + void emitCSpecificHandlerTable(const MachineFunction *MF); + + void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State); + + /// Emit the EH table data for 32-bit and 64-bit functions using + /// the __CxxFrameHandler3 personality. + void emitCXXFrameHandler3Table(const MachineFunction *MF); + + /// Emit the EH table data for _except_handler3 and _except_handler4 + /// personality functions. These are only used on 32-bit and do not use CFI + /// tables. + void emitExceptHandlerTable(const MachineFunction *MF); + + void emitCLRExceptionTable(const MachineFunction *MF); + + void computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable); + + /// Emits the label used with llvm.eh.recoverfp, which is used by + /// outlined funclets. + void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, + StringRef FLinkageName); + + const MCExpr *create32bitRef(const MCSymbol *Value); + const MCExpr *create32bitRef(const GlobalValue *GV); + const MCExpr *getLabel(const MCSymbol *Label); + const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); + const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom); + + /// Gets the offset that we should use in a table for a stack object with the + /// given index. For targets using CFI (Win64, etc), this is relative to the + /// established SP at the end of the prologue. For targets without CFI (Win32 + /// only), it is relative to the frame pointer. + int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo); + + void endFuncletImpl(); +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + WinException(AsmPrinter *A); + ~WinException() override; + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + void markFunctionEnd() override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + /// Emit target-specific EH funclet machinery. + void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override; + void endFunclet() override; +}; +} + +#endif + |
