diff options
Diffstat (limited to 'lld')
79 files changed, 2479 insertions, 941 deletions
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index e17b64df869f..39f4575031be 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -437,7 +437,7 @@ void SectionChunk::applyRelocation(uint8_t *off, // Compute the RVA of the relocation for relative relocations. uint64_t p = rva + rel.VirtualAddress; uint64_t imageBase = file->ctx.config.imageBase; - switch (file->ctx.config.machine) { + switch (getMachine()) { case AMD64: applyRelX64(off, rel.Type, os, s, p, imageBase); break; @@ -551,7 +551,7 @@ static uint8_t getBaserelType(const coff_relocation &rel, // Only called when base relocation is enabled. void SectionChunk::getBaserels(std::vector<Baserel> *res) { for (const coff_relocation &rel : getRelocs()) { - uint8_t ty = getBaserelType(rel, file->ctx.config.machine); + uint8_t ty = getBaserelType(rel, getMachine()); if (ty == IMAGE_REL_BASED_ABSOLUTE) continue; Symbol *target = file->getSymbol(rel.SymbolTableIndex); @@ -896,6 +896,20 @@ void RVAFlagTableChunk::writeTo(uint8_t *buf) const { "RVA tables should be de-duplicated"); } +size_t ECCodeMapChunk::getSize() const { + return map.size() * sizeof(chpe_range_entry); +} + +void ECCodeMapChunk::writeTo(uint8_t *buf) const { + auto table = reinterpret_cast<chpe_range_entry *>(buf); + for (uint32_t i = 0; i < map.size(); i++) { + const ECCodeMapEntry &entry = map[i]; + uint32_t start = entry.first->getRVA(); + table[i].StartOffset = start | entry.type; + table[i].Length = entry.last->getRVA() + entry.last->getSize() - start; + } +} + // MinGW specific, for the "automatic import of variables from DLLs" feature. size_t PseudoRelocTableChunk::getSize() const { if (relocs.empty()) diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 3d605e6ab10c..7b6bdeae4234 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -24,10 +24,11 @@ namespace lld::coff { using llvm::COFF::ImportDirectoryTableEntry; -using llvm::object::COFFSymbolRef; -using llvm::object::SectionRef; +using llvm::object::chpe_range_type; using llvm::object::coff_relocation; using llvm::object::coff_section; +using llvm::object::COFFSymbolRef; +using llvm::object::SectionRef; class Baserel; class Defined; @@ -114,6 +115,9 @@ public: // synthesized by the linker. bool isHotPatchable() const; + MachineTypes getMachine() const; + std::optional<chpe_range_type> getArm64ECRangeType() const; + protected: Chunk(Kind k = OtherKind) : chunkKind(k), hasData(true), p2Align(0) {} @@ -164,6 +168,8 @@ public: // Collect all locations that contain absolute addresses for base relocations. virtual void getBaserels(std::vector<Baserel> *res) {} + virtual MachineTypes getMachine() const { return IMAGE_FILE_MACHINE_UNKNOWN; } + // Returns a human-readable name of this chunk. Chunks are unnamed chunks of // bytes, so this is used only for logging or debugging. virtual StringRef getDebugName() const { return ""; } @@ -174,6 +180,16 @@ protected: NonSectionChunk(Kind k = OtherKind) : Chunk(k) {} }; +class NonSectionCodeChunk : public NonSectionChunk { +public: + virtual uint32_t getOutputCharacteristics() const override { + return llvm::COFF::IMAGE_SCN_MEM_READ | llvm::COFF::IMAGE_SCN_MEM_EXECUTE; + } + +protected: + NonSectionCodeChunk(Kind k = OtherKind) : NonSectionChunk(k) {} +}; + // MinGW specific; information about one individual location in the image // that needs to be fixed up at runtime after loading. This represents // one individual element in the PseudoRelocTableChunk table. @@ -219,6 +235,8 @@ public: ArrayRef<uint8_t> getContents() const; void writeTo(uint8_t *buf) const; + MachineTypes getMachine() const { return file->getMachineType(); } + // Defend against unsorted relocations. This may be overly conservative. void sortRelocations(); @@ -378,16 +396,13 @@ private: inline size_t Chunk::getSize() const { if (isa<SectionChunk>(this)) return static_cast<const SectionChunk *>(this)->getSize(); - else - return static_cast<const NonSectionChunk *>(this)->getSize(); + return static_cast<const NonSectionChunk *>(this)->getSize(); } inline uint32_t Chunk::getOutputCharacteristics() const { if (isa<SectionChunk>(this)) return static_cast<const SectionChunk *>(this)->getOutputCharacteristics(); - else - return static_cast<const NonSectionChunk *>(this) - ->getOutputCharacteristics(); + return static_cast<const NonSectionChunk *>(this)->getOutputCharacteristics(); } inline void Chunk::writeTo(uint8_t *buf) const { @@ -400,8 +415,7 @@ inline void Chunk::writeTo(uint8_t *buf) const { inline StringRef Chunk::getSectionName() const { if (isa<SectionChunk>(this)) return static_cast<const SectionChunk *>(this)->getSectionName(); - else - return static_cast<const NonSectionChunk *>(this)->getSectionName(); + return static_cast<const NonSectionChunk *>(this)->getSectionName(); } inline void Chunk::getBaserels(std::vector<Baserel> *res) { @@ -414,8 +428,28 @@ inline void Chunk::getBaserels(std::vector<Baserel> *res) { inline StringRef Chunk::getDebugName() const { if (isa<SectionChunk>(this)) return static_cast<const SectionChunk *>(this)->getDebugName(); - else - return static_cast<const NonSectionChunk *>(this)->getDebugName(); + return static_cast<const NonSectionChunk *>(this)->getDebugName(); +} + +inline MachineTypes Chunk::getMachine() const { + if (isa<SectionChunk>(this)) + return static_cast<const SectionChunk *>(this)->getMachine(); + return static_cast<const NonSectionChunk *>(this)->getMachine(); +} + +inline std::optional<chpe_range_type> Chunk::getArm64ECRangeType() const { + // Data sections don't need codemap entries. + if (!(getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)) + return std::nullopt; + + switch (getMachine()) { + case AMD64: + return chpe_range_type::Amd64; + case ARM64EC: + return chpe_range_type::Arm64EC; + default: + return chpe_range_type::Arm64; + } } // This class is used to implement an lld-specific feature (not implemented in @@ -488,10 +522,10 @@ static const uint8_t importThunkARM64[] = { // Windows-specific. // A chunk for DLL import jump table entry. In a final output, its // contents will be a JMP instruction to some __imp_ symbol. -class ImportThunkChunk : public NonSectionChunk { +class ImportThunkChunk : public NonSectionCodeChunk { public: ImportThunkChunk(COFFLinkerContext &ctx, Defined *s) - : NonSectionChunk(ImportThunkKind), impSymbol(s), ctx(ctx) {} + : NonSectionCodeChunk(ImportThunkKind), impSymbol(s), ctx(ctx) {} static bool classof(const Chunk *c) { return c->kind() == ImportThunkKind; } protected: @@ -504,6 +538,7 @@ public: explicit ImportThunkChunkX64(COFFLinkerContext &ctx, Defined *s); size_t getSize() const override { return sizeof(importThunkX86); } void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return AMD64; } }; class ImportThunkChunkX86 : public ImportThunkChunk { @@ -513,6 +548,7 @@ public: size_t getSize() const override { return sizeof(importThunkX86); } void getBaserels(std::vector<Baserel> *res) override; void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return I386; } }; class ImportThunkChunkARM : public ImportThunkChunk { @@ -524,6 +560,7 @@ public: size_t getSize() const override { return sizeof(importThunkARM); } void getBaserels(std::vector<Baserel> *res) override; void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return ARMNT; } }; class ImportThunkChunkARM64 : public ImportThunkChunk { @@ -534,9 +571,10 @@ public: } size_t getSize() const override { return sizeof(importThunkARM64); } void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return ARM64; } }; -class RangeExtensionThunkARM : public NonSectionChunk { +class RangeExtensionThunkARM : public NonSectionCodeChunk { public: explicit RangeExtensionThunkARM(COFFLinkerContext &ctx, Defined *t) : target(t), ctx(ctx) { @@ -544,6 +582,7 @@ public: } size_t getSize() const override; void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return ARMNT; } Defined *target; @@ -551,7 +590,7 @@ private: COFFLinkerContext &ctx; }; -class RangeExtensionThunkARM64 : public NonSectionChunk { +class RangeExtensionThunkARM64 : public NonSectionCodeChunk { public: explicit RangeExtensionThunkARM64(COFFLinkerContext &ctx, Defined *t) : target(t), ctx(ctx) { @@ -559,6 +598,7 @@ public: } size_t getSize() const override; void writeTo(uint8_t *buf) const override; + MachineTypes getMachine() const override { return ARM64; } Defined *target; @@ -663,6 +703,27 @@ public: void writeTo(uint8_t *buf) const override {} }; +class ECCodeMapEntry { +public: + ECCodeMapEntry(Chunk *first, Chunk *last, chpe_range_type type) + : first(first), last(last), type(type) {} + Chunk *first; + Chunk *last; + chpe_range_type type; +}; + +// This is a chunk containing CHPE code map on EC targets. It's a table +// of address ranges and their types. +class ECCodeMapChunk : public NonSectionChunk { +public: + ECCodeMapChunk(std::vector<ECCodeMapEntry> &map) : map(map) {} + size_t getSize() const override; + void writeTo(uint8_t *buf) const override; + +private: + std::vector<ECCodeMapEntry> ↦ +}; + // MinGW specific, for the "automatic import of variables from DLLs" feature. // This provides the table of runtime pseudo relocations, for variable // references that turned out to need to be imported from a DLL even though diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 96b49ed3850d..24126f635a06 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -48,6 +48,8 @@ enum class ExportSource { ModuleDefinition, }; +enum class EmitKind { Obj, LLVM, ASM }; + // Represents an /export option. struct Export { StringRef name; // N in /export:N or /export:E=N @@ -70,7 +72,7 @@ struct Export { StringRef symbolName; StringRef exportName; // Name in DLL - bool operator==(const Export &e) { + bool operator==(const Export &e) const { return (name == e.name && extName == e.extName && aliasTarget == e.aliasTarget && ordinal == e.ordinal && noname == e.noname && @@ -100,12 +102,16 @@ enum class ICFLevel { // behavior. }; +enum class BuildIDHash { + None, + PDB, + Binary, +}; + // Global configuration. struct Configuration { enum ManifestKind { Default, SideBySide, Embed, No }; - bool is64() const { - return machine == AMD64 || llvm::COFF::isAnyArm64(machine); - } + bool is64() const { return llvm::COFF::is64Bit(machine); } llvm::COFF::MachineTypes machine = IMAGE_FILE_MACHINE_UNKNOWN; size_t wordsize; @@ -286,6 +292,8 @@ struct Configuration { uint32_t minorSubsystemVersion = 0; uint32_t timestamp = 0; uint32_t functionPadMin = 0; + uint32_t timeTraceGranularity = 0; + uint16_t dependentLoadFlags = 0; bool dynamicBase = true; bool allowBind = true; bool cetCompat = false; @@ -309,10 +317,14 @@ struct Configuration { bool swaprunNet = false; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; + bool timeTraceEnabled = false; bool autoImport = false; bool pseudoRelocs = false; bool stdcallFixup = false; bool writeCheckSum = false; + EmitKind emit = EmitKind::Obj; + bool allowDuplicateWeak = false; + BuildIDHash buildIDHash = BuildIDHash::None; }; } // namespace lld::coff diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp index 597797010467..6b516d8c6d5e 100644 --- a/lld/COFF/DLL.cpp +++ b/lld/COFF/DLL.cpp @@ -313,11 +313,12 @@ static const uint8_t tailMergeARM64[] = { }; // A chunk for the delay import thunk. -class ThunkChunkX64 : public NonSectionChunk { +class ThunkChunkX64 : public NonSectionCodeChunk { public: ThunkChunkX64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) {} size_t getSize() const override { return sizeof(thunkX64); } + MachineTypes getMachine() const override { return AMD64; } void writeTo(uint8_t *buf) const override { memcpy(buf, thunkX64, sizeof(thunkX64)); @@ -329,11 +330,12 @@ public: Chunk *tailMerge = nullptr; }; -class TailMergeChunkX64 : public NonSectionChunk { +class TailMergeChunkX64 : public NonSectionCodeChunk { public: TailMergeChunkX64(Chunk *d, Defined *h) : desc(d), helper(h) {} size_t getSize() const override { return sizeof(tailMergeX64); } + MachineTypes getMachine() const override { return AMD64; } void writeTo(uint8_t *buf) const override { memcpy(buf, tailMergeX64, sizeof(tailMergeX64)); @@ -380,12 +382,13 @@ public: } }; -class ThunkChunkX86 : public NonSectionChunk { +class ThunkChunkX86 : public NonSectionCodeChunk { public: ThunkChunkX86(COFFLinkerContext &ctx, Defined *i, Chunk *tm) : imp(i), tailMerge(tm), ctx(ctx) {} size_t getSize() const override { return sizeof(thunkX86); } + MachineTypes getMachine() const override { return I386; } void writeTo(uint8_t *buf) const override { memcpy(buf, thunkX86, sizeof(thunkX86)); @@ -404,12 +407,13 @@ private: const COFFLinkerContext &ctx; }; -class TailMergeChunkX86 : public NonSectionChunk { +class TailMergeChunkX86 : public NonSectionCodeChunk { public: TailMergeChunkX86(COFFLinkerContext &ctx, Chunk *d, Defined *h) : desc(d), helper(h), ctx(ctx) {} size_t getSize() const override { return sizeof(tailMergeX86); } + MachineTypes getMachine() const override { return I386; } void writeTo(uint8_t *buf) const override { memcpy(buf, tailMergeX86, sizeof(tailMergeX86)); @@ -428,7 +432,7 @@ private: const COFFLinkerContext &ctx; }; -class ThunkChunkARM : public NonSectionChunk { +class ThunkChunkARM : public NonSectionCodeChunk { public: ThunkChunkARM(COFFLinkerContext &ctx, Defined *i, Chunk *tm) : imp(i), tailMerge(tm), ctx(ctx) { @@ -436,6 +440,7 @@ public: } size_t getSize() const override { return sizeof(thunkARM); } + MachineTypes getMachine() const override { return ARMNT; } void writeTo(uint8_t *buf) const override { memcpy(buf, thunkARM, sizeof(thunkARM)); @@ -454,7 +459,7 @@ private: const COFFLinkerContext &ctx; }; -class TailMergeChunkARM : public NonSectionChunk { +class TailMergeChunkARM : public NonSectionCodeChunk { public: TailMergeChunkARM(COFFLinkerContext &ctx, Chunk *d, Defined *h) : desc(d), helper(h), ctx(ctx) { @@ -462,6 +467,7 @@ public: } size_t getSize() const override { return sizeof(tailMergeARM); } + MachineTypes getMachine() const override { return ARMNT; } void writeTo(uint8_t *buf) const override { memcpy(buf, tailMergeARM, sizeof(tailMergeARM)); @@ -480,13 +486,14 @@ private: const COFFLinkerContext &ctx; }; -class ThunkChunkARM64 : public NonSectionChunk { +class ThunkChunkARM64 : public NonSectionCodeChunk { public: ThunkChunkARM64(Defined *i, Chunk *tm) : imp(i), tailMerge(tm) { setAlignment(4); } size_t getSize() const override { return sizeof(thunkARM64); } + MachineTypes getMachine() const override { return ARM64; } void writeTo(uint8_t *buf) const override { memcpy(buf, thunkARM64, sizeof(thunkARM64)); @@ -499,13 +506,14 @@ public: Chunk *tailMerge = nullptr; }; -class TailMergeChunkARM64 : public NonSectionChunk { +class TailMergeChunkARM64 : public NonSectionCodeChunk { public: TailMergeChunkARM64(Chunk *d, Defined *h) : desc(d), helper(h) { setAlignment(4); } size_t getSize() const override { return sizeof(tailMergeARM64); } + MachineTypes getMachine() const override { return ARM64; } void writeTo(uint8_t *buf) const override { memcpy(buf, tailMergeARM64, sizeof(tailMergeARM64)); diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 23ddccea695f..a4c808e4c9a0 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" using namespace llvm; using namespace llvm::codeview; @@ -310,7 +311,7 @@ Error TpiSource::mergeDebugT(TypeMerger *m) { "use remapTpiWithGHashes when ghash is enabled"); CVTypeArray types; - BinaryStreamReader reader(file->debugTypes, support::little); + BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); // When dealing with PCH.OBJ, some indices were already merged. @@ -587,7 +588,7 @@ void TpiSource::loadGHashes() { ownedGHashes = false; } else { CVTypeArray types; - BinaryStreamReader reader(file->debugTypes, support::little); + BinaryStreamReader reader(file->debugTypes, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); assignGHashesFromVector(GloballyHashedType::hashTypes(types)); } @@ -1068,6 +1069,7 @@ TypeMerger::~TypeMerger() = default; void TypeMerger::mergeTypesWithGHash() { // Load ghashes. Do type servers and PCH objects first. { + llvm::TimeTraceScope timeScope("Load GHASHes"); ScopedTimer t1(ctx.loadGHashTimer); parallelForEach(dependencySources, [&](TpiSource *source) { source->loadGHashes(); }); @@ -1075,6 +1077,7 @@ void TypeMerger::mergeTypesWithGHash() { [&](TpiSource *source) { source->loadGHashes(); }); } + llvm::TimeTraceScope timeScope("Merge types (GHASH)"); ScopedTimer t2(ctx.mergeGHashTimer); GHashState ghashState; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index d7476e91e03e..99c1a60735ad 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -247,6 +247,7 @@ void LinkerDriver::enqueuePath(StringRef path, bool wholeArchive, bool lazy) { createFutureForFile(std::string(path))); std::string pathStr = std::string(path); enqueueTask([=]() { + llvm::TimeTraceScope timeScope("File: ", path); auto [mb, ec] = future->get(); if (ec) { // Retry reading the file (synchronously) now that we may have added @@ -332,6 +333,7 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, reportBufferError(mbOrErr.takeError(), check(c.getFullName())); MemoryBufferRef mb = mbOrErr.get(); enqueueTask([=]() { + llvm::TimeTraceScope timeScope("Archive: ", mb.getBufferIdentifier()); ctx.driver.addArchiveBuffer(mb, toCOFFString(ctx, sym), parentName, offsetInArchive); }); @@ -342,12 +344,14 @@ void LinkerDriver::enqueueArchiveMember(const Archive::Child &c, CHECK(c.getFullName(), "could not get the filename for the member defining symbol " + toCOFFString(ctx, sym)); - auto future = std::make_shared<std::future<MBErrPair>>( - createFutureForFile(childName)); + auto future = + std::make_shared<std::future<MBErrPair>>(createFutureForFile(childName)); enqueueTask([=]() { auto mbOrErr = future->get(); if (mbOrErr.second) reportBufferError(errorCodeToError(mbOrErr.second), childName); + llvm::TimeTraceScope timeScope("Archive: ", + mbOrErr.first->getBufferIdentifier()); // Pass empty string as archive name so that the original filename is // used as the buffer identifier. ctx.driver.addArchiveBuffer(takeBuffer(std::move(mbOrErr.first)), @@ -640,26 +644,24 @@ void LinkerDriver::detectWinSysRoot(const opt::InputArgList &Args) { void LinkerDriver::addClangLibSearchPaths(const std::string &argv0) { std::string lldBinary = sys::fs::getMainExecutable(argv0.c_str(), nullptr); SmallString<128> binDir(lldBinary); - sys::path::remove_filename(binDir); // remove lld-link.exe + sys::path::remove_filename(binDir); // remove lld-link.exe StringRef rootDir = sys::path::parent_path(binDir); // remove 'bin' SmallString<128> libDir(rootDir); sys::path::append(libDir, "lib"); - // We need to prepend the paths here in order to make sure that we always - // try to link the clang versions of the builtins over the ones supplied by MSVC. - searchPaths.insert(searchPaths.begin(), saver().save(libDir.str())); // Add the resource dir library path SmallString<128> runtimeLibDir(rootDir); - sys::path::append(runtimeLibDir, "lib", "clang", std::to_string(LLVM_VERSION_MAJOR), "lib"); - searchPaths.insert(searchPaths.begin(), saver().save(runtimeLibDir.str())); - + sys::path::append(runtimeLibDir, "lib", "clang", + std::to_string(LLVM_VERSION_MAJOR), "lib"); // Resource dir + osname, which is hardcoded to windows since we are in the // COFF driver. SmallString<128> runtimeLibDirWithOS(runtimeLibDir); sys::path::append(runtimeLibDirWithOS, "windows"); - searchPaths.insert(searchPaths.begin(), saver().save(runtimeLibDirWithOS.str())); + searchPaths.push_back(saver().save(runtimeLibDirWithOS.str())); + searchPaths.push_back(saver().save(runtimeLibDir.str())); + searchPaths.push_back(saver().save(libDir.str())); } void LinkerDriver::addWinSysRootLibSearchPaths() { @@ -976,6 +978,7 @@ std::string LinkerDriver::getImportName(bool asLib) { } void LinkerDriver::createImportLibrary(bool asLib) { + llvm::TimeTraceScope timeScope("Create import library"); std::vector<COFFShortExport> exports; for (Export &e1 : ctx.config.exports) { COFFShortExport e2; @@ -1033,6 +1036,7 @@ void LinkerDriver::createImportLibrary(bool asLib) { } void LinkerDriver::parseModuleDefs(StringRef path) { + llvm::TimeTraceScope timeScope("Parse def file"); std::unique_ptr<MemoryBuffer> mb = CHECK(MemoryBuffer::getFile(path, /*IsText=*/false, /*RequiresNullTerminator=*/false, @@ -1097,6 +1101,7 @@ void LinkerDriver::enqueueTask(std::function<void()> task) { } bool LinkerDriver::run() { + llvm::TimeTraceScope timeScope("Read input files"); ScopedTimer t(ctx.inputFileTimer); bool didWork = !taskQueue.empty(); @@ -1145,8 +1150,7 @@ void LinkerDriver::parseOrderFile(StringRef arg) { if (set.count(s) == 0) { if (ctx.config.warnMissingOrderSymbol) warn("/order:" + arg + ": missing symbol: " + s + " [LNK4037]"); - } - else + } else ctx.config.order[s] = INT_MIN + ctx.config.order.size(); } @@ -1206,7 +1210,7 @@ static void readCallGraphsFromObjectFiles(COFFLinkerContext &ctx) { ArrayRef<uint8_t> contents; cantFail( obj->getCOFFObj()->getSectionContents(obj->callgraphSec, contents)); - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); while (!reader.empty()) { uint32_t fromIndex, toIndex; uint64_t count; @@ -1236,6 +1240,8 @@ static void markAddrsig(Symbol *s) { } static void findKeepUniqueSections(COFFLinkerContext &ctx) { + llvm::TimeTraceScope timeScope("Find keep unique sections"); + // Exported symbols could be address-significant in other executables or DSOs, // so we conservatively mark them as address-significant. for (Export &r : ctx.config.exports) @@ -1252,7 +1258,7 @@ static void findKeepUniqueSections(COFFLinkerContext &ctx) { const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; - const char *err; + const char *err = nullptr; uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(obj) + ": could not decode addrsig section: " + err); @@ -1313,8 +1319,8 @@ void LinkerDriver::parsePDBAltPath() { else if (var.equals_insensitive("%_ext%")) buf.append(binaryExtension); else { - warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + - var + " as literal"); + warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + var + + " as literal"); buf.append(var); } @@ -1328,6 +1334,7 @@ void LinkerDriver::parsePDBAltPath() { /// trees into one resource tree. /// Call after ObjFile::Instances is complete. void LinkerDriver::convertResources() { + llvm::TimeTraceScope timeScope("Convert resources"); std::vector<ObjFile *> resourceObjFiles; for (ObjFile *f : ctx.objFileInstances) { @@ -1448,8 +1455,8 @@ getVFS(const opt::InputArgList &args) { return nullptr; } - if (auto ret = vfs::getVFSFromYAML(std::move(*bufOrErr), /*DiagHandler*/ nullptr, - arg->getValue())) + if (auto ret = vfs::getVFSFromYAML(std::move(*bufOrErr), + /*DiagHandler*/ nullptr, arg->getValue())) return ret; error("Invalid vfs overlay"); @@ -1481,6 +1488,16 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ArgParser parser(ctx); opt::InputArgList args = parser.parse(argsArr); + // Initialize time trace profiler. + config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq); + config->timeTraceGranularity = + args::getInteger(args, OPT_time_trace_granularity_eq, 500); + + if (config->timeTraceEnabled) + timeTraceProfilerInitialize(config->timeTraceGranularity, argsArr[0]); + + llvm::TimeTraceScope timeScope("COFF link"); + // Parse and evaluate -mllvm options. std::vector<const char *> v; v.push_back("lld-link (LLVM option parsing)"); @@ -1488,8 +1505,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { v.push_back(arg->getValue()); config->mllvmOpts.emplace_back(arg->getValue()); } - cl::ResetAllOptionOccurrences(); - cl::ParseCommandLineOptions(v.size(), v.data()); + { + llvm::TimeTraceScope timeScope2("Parse cl::opt"); + cl::ResetAllOptionOccurrences(); + cl::ParseCommandLineOptions(v.size(), v.data()); + } // Handle /errorlimit early, because error() depends on it. if (auto *arg = args.getLastArg(OPT_errorlimit)) { @@ -1542,15 +1562,18 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { " (use --error-limit=0 to see all errors)"; // Handle /linkrepro and /reproduce. - if (std::optional<std::string> path = getReproduceFile(args)) { - Expected<std::unique_ptr<TarWriter>> errOrWriter = - TarWriter::create(*path, sys::path::stem(*path)); + { + llvm::TimeTraceScope timeScope2("Reproducer"); + if (std::optional<std::string> path = getReproduceFile(args)) { + Expected<std::unique_ptr<TarWriter>> errOrWriter = + TarWriter::create(*path, sys::path::stem(*path)); - if (errOrWriter) { - tar = std::move(*errOrWriter); - } else { - error("/linkrepro: failed to open " + *path + ": " + - toString(errOrWriter.takeError())); + if (errOrWriter) { + tar = std::move(*errOrWriter); + } else { + error("/linkrepro: failed to open " + *path + ": " + + toString(errOrWriter.takeError())); + } } } @@ -1562,13 +1585,29 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { } // Construct search path list. - searchPaths.emplace_back(""); - for (auto *arg : args.filtered(OPT_libpath)) - searchPaths.push_back(arg->getValue()); - detectWinSysRoot(args); - if (!args.hasArg(OPT_lldignoreenv) && !args.hasArg(OPT_winsysroot)) - addLibSearchPaths(); - addClangLibSearchPaths(argsArr[0]); + { + llvm::TimeTraceScope timeScope2("Search paths"); + searchPaths.emplace_back(""); + if (!config->mingw) { + // Prefer the Clang provided builtins over the ones bundled with MSVC. + // In MinGW mode, the compiler driver passes the necessary libpath + // options explicitly. + addClangLibSearchPaths(argsArr[0]); + } + for (auto *arg : args.filtered(OPT_libpath)) + searchPaths.push_back(arg->getValue()); + if (!config->mingw) { + // Don't automatically deduce the lib path from the environment or MSVC + // installations when operating in mingw mode. (This also makes LLD ignore + // winsysroot and vctoolsdir arguments.) + detectWinSysRoot(args); + if (!args.hasArg(OPT_lldignoreenv) && !args.hasArg(OPT_winsysroot)) + addLibSearchPaths(); + } else { + if (args.hasArg(OPT_vctoolsdir, OPT_winsysroot)) + warn("ignoring /vctoolsdir or /winsysroot flags in MinGW mode"); + } + } // Handle /ignore for (auto *arg : args.filtered(OPT_ignore)) { @@ -1703,16 +1742,22 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { args.hasFlag(OPT_appcontainer, OPT_appcontainer_no, false); // Handle /machine - if (auto *arg = args.getLastArg(OPT_machine)) { - config->machine = getMachineType(arg->getValue()); - if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) - fatal(Twine("unknown /machine argument: ") + arg->getValue()); - addWinSysRootLibSearchPaths(); + { + llvm::TimeTraceScope timeScope2("Machine arg"); + if (auto *arg = args.getLastArg(OPT_machine)) { + config->machine = getMachineType(arg->getValue()); + if (config->machine == IMAGE_FILE_MACHINE_UNKNOWN) + fatal(Twine("unknown /machine argument: ") + arg->getValue()); + addWinSysRootLibSearchPaths(); + } } // Handle /nodefaultlib:<filename> - for (auto *arg : args.filtered(OPT_nodefaultlib)) - config->noDefaultLibs.insert(findLib(arg->getValue()).lower()); + { + llvm::TimeTraceScope timeScope2("Nodefaultlib"); + for (auto *arg : args.filtered(OPT_nodefaultlib)) + config->noDefaultLibs.insert(findLib(arg->getValue()).lower()); + } // Handle /nodefaultlib if (args.hasArg(OPT_nodefaultlib_all)) @@ -1854,6 +1899,19 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { if (args.hasArg(OPT_lldsavetemps)) config->saveTemps = true; + // Handle /lldemit + if (auto *arg = args.getLastArg(OPT_lldemit)) { + StringRef s = arg->getValue(); + if (s == "obj") + config->emit = EmitKind::Obj; + else if (s == "llvm") + config->emit = EmitKind::LLVM; + else if (s == "asm") + config->emit = EmitKind::ASM; + else + error("/lldemit: unknown option: " + s); + } + // Handle /kill-at if (args.hasArg(OPT_kill_at)) config->killAt = true; @@ -1982,6 +2040,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { config->stdcallFixup = args.hasFlag(OPT_stdcall_fixup, OPT_stdcall_fixup_no, config->mingw); config->warnStdcallFixup = !args.hasArg(OPT_stdcall_fixup); + config->allowDuplicateWeak = + args.hasFlag(OPT_lld_allow_duplicate_weak, + OPT_lld_allow_duplicate_weak_no, config->mingw); if (args.hasFlag(OPT_inferasanlibs, OPT_inferasanlibs_no, false)) warn("ignoring '/inferasanlibs', this flag is not supported"); @@ -2037,30 +2098,33 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // Create a list of input files. These can be given as OPT_INPUT options // and OPT_wholearchive_file options, and we also need to track OPT_start_lib // and OPT_end_lib. - bool inLib = false; - for (auto *arg : args) { - switch (arg->getOption().getID()) { - case OPT_end_lib: - if (!inLib) - error("stray " + arg->getSpelling()); - inLib = false; - break; - case OPT_start_lib: - if (inLib) - error("nested " + arg->getSpelling()); - inLib = true; - break; - case OPT_wholearchive_file: - if (std::optional<StringRef> path = findFileIfNew(arg->getValue())) - enqueuePath(*path, true, inLib); - break; - case OPT_INPUT: - if (std::optional<StringRef> path = findFileIfNew(arg->getValue())) - enqueuePath(*path, isWholeArchive(*path), inLib); - break; - default: - // Ignore other options. - break; + { + llvm::TimeTraceScope timeScope2("Parse & queue inputs"); + bool inLib = false; + for (auto *arg : args) { + switch (arg->getOption().getID()) { + case OPT_end_lib: + if (!inLib) + error("stray " + arg->getSpelling()); + inLib = false; + break; + case OPT_start_lib: + if (inLib) + error("nested " + arg->getSpelling()); + inLib = true; + break; + case OPT_wholearchive_file: + if (std::optional<StringRef> path = findFileIfNew(arg->getValue())) + enqueuePath(*path, true, inLib); + break; + case OPT_INPUT: + if (std::optional<StringRef> path = findFileIfNew(arg->getValue())) + enqueuePath(*path, isWholeArchive(*path), inLib); + break; + default: + // Ignore other options. + break; + } } } @@ -2083,8 +2147,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { raw_svector_ostream stream(buffer); stream << "Library search paths:\n"; - for (StringRef path : searchPaths) + for (StringRef path : searchPaths) { + if (path == "") + path = "(cwd)"; stream << " " << path << "\n"; + } message(buffer); } @@ -2101,7 +2168,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // Handle /RELEASE if (args.hasArg(OPT_release)) config->writeCheckSum = true; - + // Handle /safeseh, x86 only, on by default, except for mingw. if (config->machine == I386) { config->safeSEH = args.hasFlag(OPT_safeseh, OPT_safeseh_no, !config->mingw); @@ -2112,7 +2179,13 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { for (auto *arg : args.filtered(OPT_functionpadmin, OPT_functionpadmin_opt)) parseFunctionPadMin(arg); + // Handle /dependentloadflag + for (auto *arg : + args.filtered(OPT_dependentloadflag, OPT_dependentloadflag_opt)) + parseDependentLoadFlags(arg); + if (tar) { + llvm::TimeTraceScope timeScope("Reproducer: response file"); tar->append("response.txt", createResponseFile(args, filePaths, ArrayRef<StringRef>(searchPaths).slice(1))); @@ -2128,20 +2201,23 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { args.hasFlag(OPT_highentropyva, OPT_highentropyva_no, true); if (!config->dynamicBase && - (config->machine == ARMNT || config->machine == ARM64)) + (config->machine == ARMNT || isAnyArm64(config->machine))) error("/dynamicbase:no is not compatible with " + machineToStr(config->machine)); // Handle /export - for (auto *arg : args.filtered(OPT_export)) { - Export e = parseExport(arg->getValue()); - if (config->machine == I386) { - if (!isDecorated(e.name)) - e.name = saver().save("_" + e.name); - if (!e.extName.empty() && !isDecorated(e.extName)) - e.extName = saver().save("_" + e.extName); + { + llvm::TimeTraceScope timeScope("Parse /export"); + for (auto *arg : args.filtered(OPT_export)) { + Export e = parseExport(arg->getValue()); + if (config->machine == I386) { + if (!isDecorated(e.name)) + e.name = saver().save("_" + e.name); + if (!e.extName.empty() && !isDecorated(e.extName)) + e.extName = saver().save("_" + e.extName); + } + config->exports.push_back(e); } - config->exports.push_back(e); } // Handle /def @@ -2162,40 +2238,47 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // that from entry point name. Must happen before /entry handling, // and after the early return when just writing an import library. if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { + llvm::TimeTraceScope timeScope("Infer subsystem"); config->subsystem = inferSubsystem(); if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) fatal("subsystem must be defined"); } // Handle /entry and /dll - if (auto *arg = args.getLastArg(OPT_entry)) { - config->entry = addUndefined(mangle(arg->getValue())); - } else if (!config->entry && !config->noEntry) { - if (args.hasArg(OPT_dll)) { - StringRef s = (config->machine == I386) ? "__DllMainCRTStartup@12" - : "_DllMainCRTStartup"; - config->entry = addUndefined(s); - } else if (config->driverWdm) { - // /driver:wdm implies /entry:_NtProcessStartup - config->entry = addUndefined(mangle("_NtProcessStartup")); - } else { - // Windows specific -- If entry point name is not given, we need to - // infer that from user-defined entry name. - StringRef s = findDefaultEntry(); - if (s.empty()) - fatal("entry point must be defined"); - config->entry = addUndefined(s); - log("Entry name inferred: " + s); + { + llvm::TimeTraceScope timeScope("Entry point"); + if (auto *arg = args.getLastArg(OPT_entry)) { + config->entry = addUndefined(mangle(arg->getValue())); + } else if (!config->entry && !config->noEntry) { + if (args.hasArg(OPT_dll)) { + StringRef s = (config->machine == I386) ? "__DllMainCRTStartup@12" + : "_DllMainCRTStartup"; + config->entry = addUndefined(s); + } else if (config->driverWdm) { + // /driver:wdm implies /entry:_NtProcessStartup + config->entry = addUndefined(mangle("_NtProcessStartup")); + } else { + // Windows specific -- If entry point name is not given, we need to + // infer that from user-defined entry name. + StringRef s = findDefaultEntry(); + if (s.empty()) + fatal("entry point must be defined"); + config->entry = addUndefined(s); + log("Entry name inferred: " + s); + } } } // Handle /delayload - for (auto *arg : args.filtered(OPT_delayload)) { - config->delayLoads.insert(StringRef(arg->getValue()).lower()); - if (config->machine == I386) { - config->delayLoadHelper = addUndefined("___delayLoadHelper2@8"); - } else { - config->delayLoadHelper = addUndefined("__delayLoadHelper2"); + { + llvm::TimeTraceScope timeScope("Delay load"); + for (auto *arg : args.filtered(OPT_delayload)) { + config->delayLoads.insert(StringRef(arg->getValue()).lower()); + if (config->machine == I386) { + config->delayLoadHelper = addUndefined("___delayLoadHelper2@8"); + } else { + config->delayLoadHelper = addUndefined("__delayLoadHelper2"); + } } } @@ -2231,6 +2314,11 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { config->lldmapFile.clear(); } + // If should create PDB, use the hash of PDB content for build id. Otherwise, + // generate using the hash of executable content. + if (args.hasFlag(OPT_build_id, OPT_build_id_no, false)) + config->buildIDHash = BuildIDHash::Binary; + if (shouldCreatePDB) { // Put the PDB next to the image if no /pdb flag was passed. if (config->pdbPath.empty()) { @@ -2252,6 +2340,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // Don't do this earlier, so that ctx.OutputFile is ready. parsePDBAltPath(); } + config->buildIDHash = BuildIDHash::PDB; } // Set default image base if /base is not given. @@ -2277,6 +2366,13 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { ctx.symtab.addAbsolute(mangle("__guard_eh_cont_count"), 0); ctx.symtab.addAbsolute(mangle("__guard_eh_cont_table"), 0); + if (isArm64EC(config->machine)) { + ctx.symtab.addAbsolute("__arm64x_extra_rfe_table", 0); + ctx.symtab.addAbsolute("__arm64x_extra_rfe_table_size", 0); + ctx.symtab.addAbsolute("__hybrid_code_map", 0); + ctx.symtab.addAbsolute("__hybrid_code_map_count", 0); + } + if (config->pseudoRelocs) { ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); ctx.symtab.addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); @@ -2289,60 +2385,64 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we // converge. - do { - // Windows specific -- if entry point is not found, - // search for its mangled names. - if (config->entry) - mangleMaybe(config->entry); + { + llvm::TimeTraceScope timeScope("Add unresolved symbols"); + do { + // Windows specific -- if entry point is not found, + // search for its mangled names. + if (config->entry) + mangleMaybe(config->entry); - // Windows specific -- Make sure we resolve all dllexported symbols. - for (Export &e : config->exports) { - if (!e.forwardTo.empty()) - continue; - e.sym = addUndefined(e.name); - if (e.source != ExportSource::Directives) - e.symbolName = mangleMaybe(e.sym); - } + // Windows specific -- Make sure we resolve all dllexported symbols. + for (Export &e : config->exports) { + if (!e.forwardTo.empty()) + continue; + e.sym = addUndefined(e.name); + if (e.source != ExportSource::Directives) + e.symbolName = mangleMaybe(e.sym); + } - // Add weak aliases. Weak aliases is a mechanism to give remaining - // undefined symbols final chance to be resolved successfully. - for (auto pair : config->alternateNames) { - StringRef from = pair.first; - StringRef to = pair.second; - Symbol *sym = ctx.symtab.find(from); - if (!sym) - continue; - if (auto *u = dyn_cast<Undefined>(sym)) - if (!u->weakAlias) - u->weakAlias = ctx.symtab.addUndefined(to); - } + // Add weak aliases. Weak aliases is a mechanism to give remaining + // undefined symbols final chance to be resolved successfully. + for (auto pair : config->alternateNames) { + StringRef from = pair.first; + StringRef to = pair.second; + Symbol *sym = ctx.symtab.find(from); + if (!sym) + continue; + if (auto *u = dyn_cast<Undefined>(sym)) + if (!u->weakAlias) + u->weakAlias = ctx.symtab.addUndefined(to); + } - // If any inputs are bitcode files, the LTO code generator may create - // references to library functions that are not explicit in the bitcode - // file's symbol table. If any of those library functions are defined in a - // bitcode file in an archive member, we need to arrange to use LTO to - // compile those archive members by adding them to the link beforehand. - if (!ctx.bitcodeFileInstances.empty()) - for (auto *s : lto::LTO::getRuntimeLibcallSymbols()) - ctx.symtab.addLibcall(s); + // If any inputs are bitcode files, the LTO code generator may create + // references to library functions that are not explicit in the bitcode + // file's symbol table. If any of those library functions are defined in a + // bitcode file in an archive member, we need to arrange to use LTO to + // compile those archive members by adding them to the link beforehand. + if (!ctx.bitcodeFileInstances.empty()) + for (auto *s : lto::LTO::getRuntimeLibcallSymbols()) + ctx.symtab.addLibcall(s); - // Windows specific -- if __load_config_used can be resolved, resolve it. - if (ctx.symtab.findUnderscore("_load_config_used")) - addUndefined(mangle("_load_config_used")); + // Windows specific -- if __load_config_used can be resolved, resolve it. + if (ctx.symtab.findUnderscore("_load_config_used")) + addUndefined(mangle("_load_config_used")); - if (args.hasArg(OPT_include_optional)) { - // Handle /includeoptional - for (auto *arg : args.filtered(OPT_include_optional)) - if (isa_and_nonnull<LazyArchive>(ctx.symtab.find(arg->getValue()))) - addUndefined(arg->getValue()); - } - } while (run()); + if (args.hasArg(OPT_include_optional)) { + // Handle /includeoptional + for (auto *arg : args.filtered(OPT_include_optional)) + if (isa_and_nonnull<LazyArchive>(ctx.symtab.find(arg->getValue()))) + addUndefined(arg->getValue()); + } + } while (run()); + } // Create wrapped symbols for -wrap option. std::vector<WrappedSymbol> wrapped = addWrappedSymbols(ctx, args); // Load more object files that might be needed for wrapped symbols. if (!wrapped.empty()) - while (run()); + while (run()) + ; if (config->autoImport || config->stdcallFixup) { // MinGW specific. @@ -2390,10 +2490,15 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // resolve symbols and write indices, but don't generate native code or link). ctx.symtab.compileBitcodeFiles(); + if (Defined *d = + dyn_cast_or_null<Defined>(ctx.symtab.findUnderscore("_tls_used"))) + config->gcroot.push_back(d); + // If -thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in addCombinedLTOObject, so we are done if that's the case. - if (config->thinLTOIndexOnly) + // Likewise, don't emit object files for other /lldemit options. + if (config->emit != EmitKind::Obj || config->thinLTOIndexOnly) return; // If we generated native object files from bitcode files, this resolves @@ -2431,6 +2536,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // need to create a .lib file. In MinGW mode, we only do that when the // -implib option is given explicitly, for compatibility with GNU ld. if (!config->exports.empty() || config->dll) { + llvm::TimeTraceScope timeScope("Create .lib exports"); fixupExports(); if (!config->noimplib && (!config->mingw || !config->implib.empty())) createImportLibrary(/*asLib=*/false); @@ -2484,6 +2590,7 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { // Handle /call-graph-ordering-file and /call-graph-profile-sort (default on). if (config->callGraphProfileSort) { + llvm::TimeTraceScope timeScope("Call graph"); if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) { parseCallGraphFile(arg->getValue()); } @@ -2532,6 +2639,15 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) { rootTimer.stop(); if (config->showTiming) ctx.rootTimer.print(); + + if (config->timeTraceEnabled) { + // Manually stop the topmost "COFF link" scope, since we're shutting down. + timeTraceProfilerEnd(); + + checkError(timeTraceProfilerWrite( + args.getLastArgValue(OPT_time_trace_eq).str(), config->outputFile)); + timeTraceProfilerCleanup(); + } } } // namespace lld::coff diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index 17d2e6a483dc..fa54de05befb 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -233,6 +233,9 @@ private: // Parses a string in the form of "[:<integer>]" void parseFunctionPadMin(llvm::opt::Arg *a); + // Parses a string in the form of "[:<integer>]" + void parseDependentLoadFlags(llvm::opt::Arg *a); + // Parses a string in the form of "EMBED[,=<integer>]|NO". void parseManifest(StringRef arg); @@ -272,7 +275,7 @@ private: // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, -#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), #include "Options.inc" #undef OPTION }; diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index d859911c2321..ab10e2d1ae74 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/WindowsManifest/WindowsManifestMerger.h" #include <limits> @@ -38,6 +39,7 @@ #include <optional> using namespace llvm::COFF; +using namespace llvm::opt; using namespace llvm; using llvm::sys::Process; @@ -263,6 +265,19 @@ void LinkerDriver::parseFunctionPadMin(llvm::opt::Arg *a) { } } +// Parses /dependentloadflag option argument. +void LinkerDriver::parseDependentLoadFlags(llvm::opt::Arg *a) { + StringRef arg = a->getNumValues() ? a->getValue() : ""; + if (!arg.empty()) { + if (arg.getAsInteger(0, ctx.config.dependentLoadFlags)) + error("/dependentloadflag: invalid argument: " + arg); + return; + } + // MSVC linker reports error "no argument specified", although MSDN describes + // argument as optional. + error("/dependentloadflag: no argument specified"); +} + // Parses a string in the form of "EMBED[,=<integer>]|NO". // Results are directly written to // Config. @@ -660,6 +675,7 @@ static StringRef exportSourceName(ExportSource s) { // Performs error checking on all /export arguments. // It also sets ordinals. void LinkerDriver::fixupExports() { + llvm::TimeTraceScope timeScope("Fixup exports"); // Symbol ordinals must be unique. std::set<uint16_t> ords; for (Export &e : ctx.config.exports) { @@ -818,9 +834,7 @@ MemoryBufferRef LinkerDriver::convertResToCOFF(ArrayRef<MemoryBufferRef> mbs, // Create table mapping all options defined in Options.td static constexpr llvm::opt::OptTable::Info infoTable[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ - {X1, X2, X10, X11, OPT_##ID, llvm::opt::Option::KIND##Class, \ - X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), #include "Options.inc" #undef OPTION }; diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp index 37f5e7549b7f..013ffcfb3d5d 100644 --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/xxhash.h" #include <algorithm> @@ -93,7 +94,10 @@ bool ICF::isEligible(SectionChunk *c) { return true; // So are vtables. - if (c->sym && c->sym->getName().starts_with("??_7")) + const char *itaniumVtablePrefix = + ctx.config.machine == I386 ? "__ZTV" : "_ZTV"; + if (c->sym && (c->sym->getName().starts_with("??_7") || + c->sym->getName().starts_with(itaniumVtablePrefix))) return true; // Anything else not in an address-significance table is eligible. @@ -246,6 +250,7 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> fn) { // Two sections are considered the same if their section headers, // contents and relocations are all the same. void ICF::run() { + llvm::TimeTraceScope timeScope("ICF"); ScopedTimer t(ctx.icfTimer); // Collect only mergeable sections and group by hash value. diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 541837a7fcec..dd2e1419bb10 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -81,7 +81,7 @@ static void checkAndSetWeakAlias(COFFLinkerContext &ctx, InputFile *f, // of another symbol emitted near the weak symbol. // Just use the definition from the first object file that defined // this weak symbol. - if (ctx.config.mingw) + if (ctx.config.allowDuplicateWeak) return; ctx.symtab.reportDuplicate(source, f); } @@ -661,6 +661,8 @@ std::optional<Symbol *> ObjFile::createDefined( if (prevailing) { SectionChunk *c = readSection(sectionNumber, def, getName()); sparseChunks[sectionNumber] = c; + if (!c) + return nullptr; c->sym = cast<DefinedRegular>(leader); c->selection = selection; cast<DefinedRegular>(leader)->data = &c->repl; @@ -707,7 +709,7 @@ void ObjFile::initializeFlags() { DebugSubsectionArray subsections; - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, data.size())); @@ -773,7 +775,7 @@ void ObjFile::initializeDependencies() { // Get the first type record. It will indicate if this object uses a type // server (/Zi) or a PCH file (/Yu). CVTypeArray types; - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); cantFail(reader.readArray(types, reader.getLength())); CVTypeArray::Iterator firstType = types.begin(); if (firstType == types.end()) @@ -1038,6 +1040,21 @@ void BitcodeFile::parse() { fakeSC = &ctx.ltoDataSectionChunk.chunk; if (objSym.isUndefined()) { sym = ctx.symtab.addUndefined(symName, this, false); + if (objSym.isWeak()) + sym->deferUndefined = true; + // If one LTO object file references (i.e. has an undefined reference to) + // a symbol with an __imp_ prefix, the LTO compilation itself sees it + // as unprefixed but with a dllimport attribute instead, and doesn't + // understand the relation to a concrete IR symbol with the __imp_ prefix. + // + // For such cases, mark the symbol as used in a regular object (i.e. the + // symbol must be retained) so that the linker can associate the + // references in the end. If the symbol is defined in an import library + // or in a regular object file, this has no effect, but if it is defined + // in another LTO object file, this makes sure it is kept, to fulfill + // the reference when linking the output of the LTO compilation. + if (symName.starts_with("__imp_")) + sym->isUsedInRegularObj = true; } else if (objSym.isCommon()) { sym = ctx.symtab.addCommon(this, symName, objSym.getCommonSize()); } else if (objSym.isWeak() && objSym.isIndirect()) { @@ -1080,6 +1097,7 @@ MachineTypes BitcodeFile::getMachineType() { case Triple::x86: return I386; case Triple::arm: + case Triple::thumb: return ARMNT; case Triple::aarch64: return ARM64; diff --git a/lld/COFF/LLDMapFile.cpp b/lld/COFF/LLDMapFile.cpp index c14480aaf821..58098cf5d652 100644 --- a/lld/COFF/LLDMapFile.cpp +++ b/lld/COFF/LLDMapFile.cpp @@ -25,6 +25,7 @@ #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "llvm/Support/Parallel.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -92,6 +93,7 @@ void lld::coff::writeLLDMapFile(const COFFLinkerContext &ctx) { if (ctx.config.lldmapFile.empty()) return; + llvm::TimeTraceScope timeScope(".lldmap file"); std::error_code ec; raw_fd_ostream os(ctx.config.lldmapFile, ec, sys::fs::OF_None); if (ec) diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index 67f5a62920e9..7df931911213 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -13,6 +13,7 @@ #include "Symbols.h" #include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" +#include "lld/Common/Filesystem.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" #include "llvm/ADT/STLExtras.h" @@ -42,18 +43,6 @@ using namespace llvm::object; using namespace lld; using namespace lld::coff; -// Creates an empty file to and returns a raw_fd_ostream to write to it. -static std::unique_ptr<raw_fd_ostream> openFile(StringRef file) { - std::error_code ec; - auto ret = - std::make_unique<raw_fd_ostream>(file, ec, sys::fs::OpenFlags::OF_None); - if (ec) { - error("cannot open " + file + ": " + ec.message()); - return nullptr; - } - return ret; -} - std::string BitcodeCompiler::getThinLTOOutputFile(StringRef path) { return lto::getThinLTOOutputFile(path, ctx.config.thinLTOPrefixReplaceOld, ctx.config.thinLTOPrefixReplaceNew); @@ -88,7 +77,7 @@ lto::Config BitcodeCompiler::createConfig() { c.OptLevel = ctx.config.ltoo; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); - std::optional<CodeGenOpt::Level> optLevelOrNone = CodeGenOpt::getLevel( + std::optional<CodeGenOptLevel> optLevelOrNone = CodeGenOpt::getLevel( ctx.config.ltoCgo.value_or(args::getCGOptLevel(ctx.config.ltoo))); assert(optLevelOrNone && "Invalid optimization level!"); c.CGOptLevel = *optLevelOrNone; @@ -97,6 +86,20 @@ lto::Config BitcodeCompiler::createConfig() { c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile); c.RunCSIRInstr = ctx.config.ltoCSProfileGenerate; c.PGOWarnMismatch = ctx.config.ltoPGOWarnMismatch; + c.TimeTraceEnabled = ctx.config.timeTraceEnabled; + c.TimeTraceGranularity = ctx.config.timeTraceGranularity; + + if (ctx.config.emit == EmitKind::LLVM) { + c.PostInternalizeModuleHook = [this](size_t task, const Module &m) { + if (std::unique_ptr<raw_fd_ostream> os = + openLTOOutputFile(ctx.config.outputFile)) + WriteBitcodeToFile(m, *os, false); + return false; + }; + } else if (ctx.config.emit == EmitKind::ASM) { + c.CGFileType = CodeGenFileType::AssemblyFile; + c.Options.MCOptions.AsmVerbose = true; + } if (ctx.config.saveTemps) checkError(c.addSaveTemps(std::string(ctx.config.outputFile) + ".", @@ -215,6 +218,8 @@ std::vector<InputFile *> BitcodeCompiler::compile() { pruneCache(ctx.config.ltoCache, ctx.config.ltoCachePolicy, files); std::vector<InputFile *> ret; + bool emitASM = ctx.config.emit == EmitKind::ASM; + const char *Ext = emitASM ? ".s" : ".obj"; for (unsigned i = 0; i != maxTasks; ++i) { StringRef bitcodeFilePath; // Get the native object contents either from the cache or from memory. Do @@ -237,20 +242,21 @@ std::vector<InputFile *> BitcodeCompiler::compile() { if (bitcodeFilePath == "ld-temp.o") { ltoObjName = saver().save(Twine(ctx.config.outputFile) + ".lto" + - (i == 0 ? Twine("") : Twine('.') + Twine(i)) + ".obj"); + (i == 0 ? Twine("") : Twine('.') + Twine(i)) + Ext); } else { StringRef directory = sys::path::parent_path(bitcodeFilePath); - StringRef baseName = sys::path::filename(bitcodeFilePath); + StringRef baseName = sys::path::stem(bitcodeFilePath); StringRef outputFileBaseName = sys::path::filename(ctx.config.outputFile); SmallString<64> path; sys::path::append(path, directory, - outputFileBaseName + ".lto." + baseName); + outputFileBaseName + ".lto." + baseName + Ext); sys::path::remove_dots(path, true); ltoObjName = saver().save(path.str()); } - if (ctx.config.saveTemps) + if (ctx.config.saveTemps || emitASM) saveBuffer(buf[i].second, ltoObjName); - ret.push_back(make<ObjFile>(ctx, MemoryBufferRef(objBuf, ltoObjName))); + if (!emitASM) + ret.push_back(make<ObjFile>(ctx, MemoryBufferRef(objBuf, ltoObjName))); } return ret; diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp index f7a4ef961290..ed521dd375ed 100644 --- a/lld/COFF/MapFile.cpp +++ b/lld/COFF/MapFile.cpp @@ -36,6 +36,7 @@ #include "lld/Common/Timer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -203,6 +204,7 @@ void lld::coff::writeMapFile(COFFLinkerContext &ctx) { if (ctx.config.mapFile.empty()) return; + llvm::TimeTraceScope timeScope("Map file"); std::error_code ec; raw_fd_ostream os(ctx.config.mapFile, ec, sys::fs::OF_None); if (ec) diff --git a/lld/COFF/MarkLive.cpp b/lld/COFF/MarkLive.cpp index ad8c340f1845..2cf216a6aaad 100644 --- a/lld/COFF/MarkLive.cpp +++ b/lld/COFF/MarkLive.cpp @@ -11,6 +11,7 @@ #include "Symbols.h" #include "lld/Common/Timer.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/TimeProfiler.h" #include <vector> namespace lld::coff { @@ -19,6 +20,7 @@ namespace lld::coff { // COMDAT chunks will be ignored by Writer, so they will be excluded // from the final output. void markLive(COFFLinkerContext &ctx) { + llvm::TimeTraceScope timeScope("Mark live"); ScopedTimer t(ctx.gcTimer); // We build up a worklist of sections which have been marked as live. We only diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp index 53e146bb8600..e46f5277a8c3 100644 --- a/lld/COFF/MinGW.cpp +++ b/lld/COFF/MinGW.cpp @@ -16,6 +16,7 @@ #include "llvm/Object/COFF.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -172,6 +173,7 @@ bool AutoExporter::shouldExport(Defined *sym) const { void lld::coff::writeDefFile(StringRef name, const std::vector<Export> &exports) { + llvm::TimeTraceScope timeScope("Write .def file"); std::error_code ec; raw_fd_ostream os(name, ec, sys::fs::OF_None); if (ec) diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index ea4ddb2d8495..4dab4a207173 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -56,6 +56,9 @@ def filealign : P<"filealign", "Section alignment in the output file">; def functionpadmin : F<"functionpadmin">; def functionpadmin_opt : P<"functionpadmin", "Prepares an image for hotpatching">; +def dependentloadflag : F<"dependentloadflag">; +def dependentloadflag_opt : P<"dependentloadflag", + "Sets the default load flags used to resolve the statically linked imports of a module">; def guard : P<"guard", "Control flow guard">; def heap : P<"heap", "Size of the heap">; def ignore : P<"ignore", "Specify warning codes to ignore">; @@ -232,6 +235,8 @@ defm demangle : B<"demangle", def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">, HelpText<"Add symbol as undefined, but allow it to remain undefined">; def kill_at : F<"kill-at">; +defm lld_allow_duplicate_weak : B_priv<"lld-allow-duplicate-weak">; +def lldemit : P<"lldemit", "Specify output type">; def lldmingw : F<"lldmingw">; def noseh : F<"noseh">; def osversion : P_priv<"osversion">; @@ -289,6 +294,19 @@ def wrap : P_priv<"wrap">; def vfsoverlay : P<"vfsoverlay", "Path to a vfsoverlay yaml file to optionally look for /defaultlib's in">; +def time_trace_eq: Joined<["--"], "time-trace=">, MetaVarName<"<file>">, + HelpText<"Record time trace to <file>">; +def : Flag<["--"], "time-trace">, Alias<time_trace_eq>, + HelpText<"Record time trace to file next to output">; + +def time_trace_granularity_eq: Joined<["--"], "time-trace-granularity=">, + HelpText<"Minimum time granularity (in microseconds) traced by time profiler">; + +defm build_id: B< + "build-id", + "Generate build ID (always on when generating PDB)", + "Do not Generate build ID">; + // Flags for debugging def lldmap : F<"lldmap">; def lldmap_file : P_priv<"lldmap">; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp index 5aa81c1fd03b..f77ff0d4eab8 100644 --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -57,6 +57,7 @@ #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/TimeProfiler.h" #include <memory> #include <optional> @@ -187,9 +188,6 @@ class DebugSHandler { /// The object file whose .debug$S sections we're processing. ObjFile &file; - /// The result of merging type indices. - TpiSource *source; - /// The DEBUG_S_STRINGTABLE subsection. These strings are referred to by /// index from other records in the .debug$S section. All of these strings /// need to be added to the global PDB string table, and all references to @@ -230,11 +228,9 @@ class DebugSHandler { void addFrameDataSubsection(SectionChunk *debugChunk, const DebugSubsectionRecord &ss); - void recordStringTableReferences(CVSymbol sym, uint32_t symOffset); - public: - DebugSHandler(PDBLinker &linker, ObjFile &file, TpiSource *source) - : linker(linker), file(file), source(source) {} + DebugSHandler(PDBLinker &linker, ObjFile &file) + : linker(linker), file(file) {} void handleDebugS(SectionChunk *debugChunk); @@ -660,7 +656,7 @@ Error PDBLinker::writeAllModuleSymbolRecords(ObjFile *file, auto contents = SectionChunk::consumeDebugMagic(sectionContents, ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); exitOnErr(reader.readArray(subsections, contents.size())); uint32_t nextRelocIndex = 0; @@ -762,7 +758,7 @@ void DebugSHandler::handleDebugS(SectionChunk *debugChunk) { ArrayRef<uint8_t> contents = debugChunk->getContents(); contents = SectionChunk::consumeDebugMagic(contents, ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(reader.readArray(subsections, contents.size())); debugChunk->sortRelocations(); @@ -872,7 +868,7 @@ Error UnrelocatedDebugSubsection::commit(BinaryStreamWriter &writer) const { debugChunk->file->debugTypesObj) { TpiSource *source = debugChunk->file->debugTypesObj; DebugInlineeLinesSubsectionRef inlineeLines; - BinaryStreamReader storageReader(relocatedBytes, support::little); + BinaryStreamReader storageReader(relocatedBytes, llvm::endianness::little); ExitOnError exitOnErr; exitOnErr(inlineeLines.initialize(storageReader)); for (const InlineeSourceLine &line : inlineeLines) { @@ -966,7 +962,7 @@ void DebugSHandler::finish() { // Copy each frame data record, add in rvaStart, translate string table // indices, and add the record to the PDB. DebugFrameDataSubsectionRef fds; - BinaryStreamReader reader(subsecData, support::little); + BinaryStreamReader reader(subsecData, llvm::endianness::little); exitOnErr(fds.initialize(reader)); for (codeview::FrameData fd : fds) { fd.RvaStart += rvaStart; @@ -1032,10 +1028,11 @@ void PDBLinker::addDebugSymbols(TpiSource *source) { if (!source->file) return; + llvm::TimeTraceScope timeScope("Merge symbols"); ScopedTimer t(ctx.symbolMergingTimer); ExitOnError exitOnErr; pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); - DebugSHandler dsh(*this, *source->file, source); + DebugSHandler dsh(*this, *source->file); // Now do all live .debug$S and .debug$F sections. for (SectionChunk *debugChunk : source->file->getDebugChunks()) { if (!debugChunk->live || debugChunk->getSize() == 0) @@ -1053,7 +1050,8 @@ void PDBLinker::addDebugSymbols(TpiSource *source) { ArrayRef<uint8_t> relocatedDebugContents = relocateDebugChunk(*debugChunk); FixedStreamArray<object::FpoData> fpoRecords; - BinaryStreamReader reader(relocatedDebugContents, support::little); + BinaryStreamReader reader(relocatedDebugContents, + llvm::endianness::little); uint32_t count = relocatedDebugContents.size() / sizeof(object::FpoData); exitOnErr(reader.readArray(fpoRecords, count)); @@ -1106,6 +1104,7 @@ void PDBLinker::addDebug(TpiSource *source) { // indices to PDB type and item indices. If we are using ghashes, types have // already been merged. if (!ctx.config.debugGHashes) { + llvm::TimeTraceScope timeScope("Merge types (Non-GHASH)"); ScopedTimer t(ctx.typeMergingTimer); if (Error e = source->mergeDebugT(&tMerger)) { // If type merging failed, ignore the symbols. @@ -1150,39 +1149,49 @@ static pdb::BulkPublic createPublic(COFFLinkerContext &ctx, Defined *def) { // Add all object files to the PDB. Merge .debug$T sections into IpiData and // TpiData. void PDBLinker::addObjectsToPDB() { - ScopedTimer t1(ctx.addObjectsTimer); + { + llvm::TimeTraceScope timeScope("Add objects to PDB"); + ScopedTimer t1(ctx.addObjectsTimer); - // Create module descriptors - for (ObjFile *obj : ctx.objFileInstances) - createModuleDBI(obj); + // Create module descriptors + for (ObjFile *obj : ctx.objFileInstances) + createModuleDBI(obj); - // Reorder dependency type sources to come first. - tMerger.sortDependencies(); + // Reorder dependency type sources to come first. + tMerger.sortDependencies(); - // Merge type information from input files using global type hashing. - if (ctx.config.debugGHashes) - tMerger.mergeTypesWithGHash(); + // Merge type information from input files using global type hashing. + if (ctx.config.debugGHashes) + tMerger.mergeTypesWithGHash(); - // Merge dependencies and then regular objects. - for (TpiSource *source : tMerger.dependencySources) - addDebug(source); - for (TpiSource *source : tMerger.objectSources) - addDebug(source); + // Merge dependencies and then regular objects. + { + llvm::TimeTraceScope timeScope("Merge debug info (dependencies)"); + for (TpiSource *source : tMerger.dependencySources) + addDebug(source); + } + { + llvm::TimeTraceScope timeScope("Merge debug info (objects)"); + for (TpiSource *source : tMerger.objectSources) + addDebug(source); + } - builder.getStringTableBuilder().setStrings(pdbStrTab); - t1.stop(); + builder.getStringTableBuilder().setStrings(pdbStrTab); + } // Construct TPI and IPI stream contents. - ScopedTimer t2(ctx.tpiStreamLayoutTimer); + { + llvm::TimeTraceScope timeScope("TPI/IPI stream layout"); + ScopedTimer t2(ctx.tpiStreamLayoutTimer); - // Collect all the merged types. - if (ctx.config.debugGHashes) { - addGHashTypeInfo(ctx, builder); - } else { - addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); - addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + // Collect all the merged types. + if (ctx.config.debugGHashes) { + addGHashTypeInfo(ctx, builder); + } else { + addTypeInfo(builder.getTpiBuilder(), tMerger.getTypeTable()); + addTypeInfo(builder.getIpiBuilder(), tMerger.getIDTable()); + } } - t2.stop(); if (ctx.config.showSummary) { for (TpiSource *source : ctx.tpiSourceList) { @@ -1193,6 +1202,7 @@ void PDBLinker::addObjectsToPDB() { } void PDBLinker::addPublicsToPDB() { + llvm::TimeTraceScope timeScope("Publics layout"); ScopedTimer t3(ctx.publicsLayoutTimer); // Compute the public symbols. auto &gsiBuilder = builder.getGsiBuilder(); @@ -1311,6 +1321,7 @@ void PDBLinker::printStats() { } void PDBLinker::addNatvisFiles() { + llvm::TimeTraceScope timeScope("Natvis files"); for (StringRef file : ctx.config.natvisFiles) { ErrorOr<std::unique_ptr<MemoryBuffer>> dataOrErr = MemoryBuffer::getFile(file); @@ -1330,6 +1341,7 @@ void PDBLinker::addNatvisFiles() { } void PDBLinker::addNamedStreams() { + llvm::TimeTraceScope timeScope("Named streams"); ExitOnError exitOnErr; for (const auto &streamFile : ctx.config.namedStreams) { const StringRef stream = streamFile.getKey(), file = streamFile.getValue(); @@ -1504,6 +1516,7 @@ void PDBLinker::addImportFilesToPDB() { if (ctx.importFileInstances.empty()) return; + llvm::TimeTraceScope timeScope("Import files"); ExitOnError exitOnErr; std::map<std::string, llvm::pdb::DbiModuleDescriptorBuilder *> dllToModuleDbi; @@ -1593,25 +1606,37 @@ void PDBLinker::addImportFilesToPDB() { void lld::coff::createPDB(COFFLinkerContext &ctx, ArrayRef<uint8_t> sectionTable, llvm::codeview::DebugInfo *buildId) { + llvm::TimeTraceScope timeScope("PDB file"); ScopedTimer t1(ctx.totalPdbLinkTimer); - PDBLinker pdb(ctx); + { + PDBLinker pdb(ctx); - pdb.initialize(buildId); - pdb.addObjectsToPDB(); - pdb.addImportFilesToPDB(); - pdb.addSections(sectionTable); - pdb.addNatvisFiles(); - pdb.addNamedStreams(); - pdb.addPublicsToPDB(); + pdb.initialize(buildId); + pdb.addObjectsToPDB(); + pdb.addImportFilesToPDB(); + pdb.addSections(sectionTable); + pdb.addNatvisFiles(); + pdb.addNamedStreams(); + pdb.addPublicsToPDB(); - ScopedTimer t2(ctx.diskCommitTimer); - codeview::GUID guid; - pdb.commit(&guid); - memcpy(&buildId->PDB70.Signature, &guid, 16); + { + llvm::TimeTraceScope timeScope("Commit PDB file to disk"); + ScopedTimer t2(ctx.diskCommitTimer); + codeview::GUID guid; + pdb.commit(&guid); + memcpy(&buildId->PDB70.Signature, &guid, 16); + } - t2.stop(); - t1.stop(); - pdb.printStats(); + t1.stop(); + pdb.printStats(); + + // Manually start this profile point to measure ~PDBLinker(). + if (getTimeTraceProfilerInstance() != nullptr) + timeTraceProfilerBegin("PDBLinker destructor", StringRef("")); + } + // Manually end this profile point to measure ~PDBLinker(). + if (getTimeTraceProfilerInstance() != nullptr) + timeTraceProfilerEnd(); } void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) { @@ -1646,6 +1671,7 @@ void PDBLinker::initialize(llvm::codeview::DebugInfo *buildId) { } void PDBLinker::addSections(ArrayRef<uint8_t> sectionTable) { + llvm::TimeTraceScope timeScope("PDB output sections"); ExitOnError exitOnErr; // It's not entirely clear what this is, but the * Linker * module uses it. pdb::DbiStreamBuilder &dbiBuilder = builder.getDbiBuilder(); @@ -1747,7 +1773,7 @@ static bool findLineTable(const SectionChunk *c, uint32_t addr, ArrayRef<uint8_t> contents = SectionChunk::consumeDebugMagic(dbgC->getContents(), ".debug$S"); DebugSubsectionArray subsections; - BinaryStreamReader reader(contents, support::little); + BinaryStreamReader reader(contents, llvm::endianness::little); exitOnErr(reader.readArray(subsections, contents.size())); for (const DebugSubsectionRecord &ss : subsections) { diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index b7217d14391a..44aa506d2c35 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -61,6 +61,10 @@ void SymbolTable::addFile(InputFile *file) { if (auto *f = dyn_cast<ObjFile>(file)) { ctx.objFileInstances.push_back(f); } else if (auto *f = dyn_cast<BitcodeFile>(file)) { + if (ltoCompilationDone) { + error("LTO object file " + toString(file) + " linked in after " + "doing LTO compilation."); + } ctx.bitcodeFileInstances.push_back(f); } else if (auto *f = dyn_cast<ImportFile>(file)) { ctx.importFileInstances.push_back(f); @@ -458,8 +462,10 @@ void SymbolTable::reportUnresolvable() { StringRef name = undef->getName(); if (name.starts_with("__imp_")) { Symbol *imp = find(name.substr(strlen("__imp_"))); - if (imp && isa<Defined>(imp)) + if (Defined *def = dyn_cast_or_null<Defined>(imp)) { + def->isUsedInRegularObj = true; continue; + } } if (name.contains("_PchSym_")) continue; @@ -473,6 +479,7 @@ void SymbolTable::reportUnresolvable() { } void SymbolTable::resolveRemainingUndefines() { + llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols"); SmallPtrSet<Symbol *, 8> undefs; DenseMap<Symbol *, Symbol *> localImports; @@ -875,9 +882,11 @@ Symbol *SymbolTable::addUndefined(StringRef name) { } void SymbolTable::compileBitcodeFiles() { + ltoCompilationDone = true; if (ctx.bitcodeFileInstances.empty()) return; + llvm::TimeTraceScope timeScope("Compile bitcode"); ScopedTimer t(ctx.ltoTimer); lto.reset(new BitcodeCompiler(ctx)); for (BitcodeFile *f : ctx.bitcodeFileInstances) diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 511e60d1e3a0..fc623c2840d4 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -133,6 +133,7 @@ private: llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> symMap; std::unique_ptr<BitcodeCompiler> lto; + bool ltoCompilationDone = false; COFFLinkerContext &ctx; }; diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 85568a12531f..7b1ff8071e2e 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include <algorithm> #include <cstdio> @@ -196,6 +197,10 @@ public: } }; +struct ChunkRange { + Chunk *first = nullptr, *last; +}; + // The writer writes a SymbolTable result to a file. class Writer { public: @@ -211,6 +216,7 @@ private: void locateImportTables(); void createExportTable(); void mergeSections(); + void sortECChunks(); void removeUnusedSections(); void assignAddresses(); bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin); @@ -219,6 +225,7 @@ private: uint16_t type, int margin); bool createThunks(OutputSection *os, int margin); bool verifyRanges(const std::vector<Chunk *> chunks); + void createECCodeMap(); void finalizeAddresses(); void removeEmptySections(); void assignOutputSectionIndices(); @@ -227,6 +234,7 @@ private: template <typename PEHeaderTy> void writeHeader(); void createSEHTable(); void createRuntimePseudoRelocs(); + void createECChunks(); void insertCtorDtorSymbols(); void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols); void createGuardCFTables(); @@ -239,11 +247,13 @@ private: void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag=false); void setSectionPermissions(); + void setECSymbols(); void writeSections(); void writeBuildId(); void writePEChecksum(); void sortSections(); - void sortExceptionTable(); + template <typename T> void sortExceptionTable(ChunkRange &exceptionTable); + void sortExceptionTables(); void sortCRTSectionChunks(std::vector<Chunk *> &chunks); void addSyntheticIdata(); void sortBySectionOrder(std::vector<Chunk *> &chunks); @@ -262,13 +272,15 @@ private: uint32_t getSizeOfInitializedData(); - void checkLoadConfig(); + void prepareLoadConfig(); + template <typename T> void prepareLoadConfig(T *loadConfig); template <typename T> void checkLoadConfigGuardData(const T *loadConfig); std::unique_ptr<FileOutputBuffer> &buffer; std::map<PartialSectionKey, PartialSection *> partialSections; std::vector<char> strtab; std::vector<llvm::object::coff_symbol16> outputSymtab; + std::vector<ECCodeMapEntry> codeMap; IdataContents idata; Chunk *importTableStart = nullptr; uint64_t importTableSize = 0; @@ -303,8 +315,10 @@ private: OutputSection *relocSec; OutputSection *ctorsSec; OutputSection *dtorsSec; + // Either .rdata section or .buildid section. + OutputSection *debugInfoSec; - // The first and last .pdata sections in the output file. + // The range of .pdata sections in the output file. // // We need to keep track of the location of .pdata in whichever section it // gets merged into so that we can sort its contents and emit a correct data @@ -313,14 +327,19 @@ private: // are entirely linker-generated we can keep track of their locations using // the chunks that the linker creates. All .pdata chunks come from input // files, so we need to keep track of them separately. - Chunk *firstPdata = nullptr; - Chunk *lastPdata; + ChunkRange pdata; + + // x86_64 .pdata sections on ARM64EC/ARM64X targets. + ChunkRange hybridPdata; COFFLinkerContext &ctx; }; } // anonymous namespace -void lld::coff::writeResult(COFFLinkerContext &ctx) { Writer(ctx).run(); } +void lld::coff::writeResult(COFFLinkerContext &ctx) { + llvm::TimeTraceScope timeScope("Write output(s)"); + Writer(ctx).run(); +} void OutputSection::addChunk(Chunk *c) { chunks.push_back(c); @@ -341,6 +360,14 @@ void OutputSection::merge(OutputSection *other) { contribSections.insert(contribSections.end(), other->contribSections.begin(), other->contribSections.end()); other->contribSections.clear(); + + // MS link.exe compatibility: when merging a code section into a data section, + // mark the target section as a code section. + if (other->header.Characteristics & IMAGE_SCN_CNT_CODE) { + header.Characteristics |= IMAGE_SCN_CNT_CODE; + header.Characteristics &= + ~(IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_CNT_UNINITIALIZED_DATA); + } } // Write the section header to a given buffer. @@ -523,6 +550,48 @@ bool Writer::createThunks(OutputSection *os, int margin) { return addressesChanged; } +// Create a code map for CHPE metadata. +void Writer::createECCodeMap() { + if (!isArm64EC(ctx.config.machine)) + return; + + // Clear the map in case we were're recomputing the map after adding + // a range extension thunk. + codeMap.clear(); + + std::optional<chpe_range_type> lastType; + Chunk *first, *last; + + auto closeRange = [&]() { + if (lastType) { + codeMap.push_back({first, last, *lastType}); + lastType.reset(); + } + }; + + for (OutputSection *sec : ctx.outputSections) { + for (Chunk *c : sec->chunks) { + // Skip empty section chunks. MS link.exe does not seem to do that and + // generates empty code ranges in some cases. + if (isa<SectionChunk>(c) && !c->getSize()) + continue; + + std::optional<chpe_range_type> chunkType = c->getArm64ECRangeType(); + if (chunkType != lastType) { + closeRange(); + first = c; + lastType = chunkType; + } + last = c; + } + } + + closeRange(); + + Symbol *tableCountSym = ctx.symtab.findUnderscore("__hybrid_code_map_count"); + cast<DefinedAbsolute>(tableCountSym)->setVA(codeMap.size()); +} + // Verify that all relocations are in range, with no extra margin requirements. bool Writer::verifyRanges(const std::vector<Chunk *> chunks) { for (Chunk *c : chunks) { @@ -563,16 +632,21 @@ void Writer::finalizeAddresses() { int pass = 0; int margin = 1024 * 100; while (true) { + llvm::TimeTraceScope timeScope2("Add thunks pass"); + // First check whether we need thunks at all, or if the previous pass of // adding them turned out ok. bool rangesOk = true; size_t numChunks = 0; - for (OutputSection *sec : ctx.outputSections) { - if (!verifyRanges(sec->chunks)) { - rangesOk = false; - break; + { + llvm::TimeTraceScope timeScope3("Verify ranges"); + for (OutputSection *sec : ctx.outputSections) { + if (!verifyRanges(sec->chunks)) { + rangesOk = false; + break; + } + numChunks += sec->chunks.size(); } - numChunks += sec->chunks.size(); } if (rangesOk) { if (pass > 0) @@ -596,8 +670,11 @@ void Writer::finalizeAddresses() { // Try adding thunks everywhere where it is needed, with a margin // to avoid things going out of range due to the added thunks. bool addressesChanged = false; - for (OutputSection *sec : ctx.outputSections) - addressesChanged |= createThunks(sec, margin); + { + llvm::TimeTraceScope timeScope3("Create thunks"); + for (OutputSection *sec : ctx.outputSections) + addressesChanged |= createThunks(sec, margin); + } // If the verification above thought we needed thunks, we should have // added some. assert(addressesChanged); @@ -616,6 +693,8 @@ void Writer::writePEChecksum() { return; } + llvm::TimeTraceScope timeScope("PE checksum"); + // https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#checksum uint32_t *buf = (uint32_t *)buffer->getBufferStart(); uint32_t size = (uint32_t)(buffer->getBufferSize()); @@ -650,42 +729,46 @@ void Writer::writePEChecksum() { // The main function of the writer. void Writer::run() { - ScopedTimer t1(ctx.codeLayoutTimer); - - createImportTables(); - createSections(); - appendImportThunks(); - // Import thunks must be added before the Control Flow Guard tables are added. - createMiscChunks(); - createExportTable(); - mergeSections(); - removeUnusedSections(); - finalizeAddresses(); - removeEmptySections(); - assignOutputSectionIndices(); - setSectionPermissions(); - createSymbolAndStringTable(); + { + llvm::TimeTraceScope timeScope("Write PE"); + ScopedTimer t1(ctx.codeLayoutTimer); - if (fileSize > UINT32_MAX) - fatal("image size (" + Twine(fileSize) + ") " + - "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); + createImportTables(); + createSections(); + appendImportThunks(); + // Import thunks must be added before the Control Flow Guard tables are + // added. + createMiscChunks(); + createExportTable(); + mergeSections(); + sortECChunks(); + removeUnusedSections(); + finalizeAddresses(); + removeEmptySections(); + assignOutputSectionIndices(); + setSectionPermissions(); + setECSymbols(); + createSymbolAndStringTable(); - openFile(ctx.config.outputFile); - if (ctx.config.is64()) { - writeHeader<pe32plus_header>(); - } else { - writeHeader<pe32_header>(); - } - writeSections(); - checkLoadConfig(); - sortExceptionTable(); + if (fileSize > UINT32_MAX) + fatal("image size (" + Twine(fileSize) + ") " + + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); - // Fix up the alignment in the TLS Directory's characteristic field, - // if a specific alignment value is needed - if (tlsAlignment) - fixTlsAlignment(); + openFile(ctx.config.outputFile); + if (ctx.config.is64()) { + writeHeader<pe32plus_header>(); + } else { + writeHeader<pe32_header>(); + } + writeSections(); + prepareLoadConfig(); + sortExceptionTables(); - t1.stop(); + // Fix up the alignment in the TLS Directory's characteristic field, + // if a specific alignment value is needed + if (tlsAlignment) + fixTlsAlignment(); + } if (!ctx.config.pdbPath.empty() && ctx.config.debug) { assert(buildId); @@ -701,6 +784,7 @@ void Writer::run() { if (errorCount()) return; + llvm::TimeTraceScope timeScope("Commit PE to disk"); ScopedTimer t2(ctx.outputCommitTimer); if (auto e = buffer->commit()) fatal("failed to write output '" + buffer->getPath() + @@ -878,6 +962,7 @@ void Writer::sortSections() { // Create output section objects and add them to OutputSections. void Writer::createSections() { + llvm::TimeTraceScope timeScope("Output sections"); // First, create the builtin sections. const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA; const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA; @@ -1003,6 +1088,7 @@ void Writer::createSections() { } void Writer::createMiscChunks() { + llvm::TimeTraceScope timeScope("Misc chunks"); Configuration *config = &ctx.config; for (MergeChunk *p : ctx.mergeChunkInstances) { @@ -1019,15 +1105,16 @@ void Writer::createMiscChunks() { } // Create Debug Information Chunks - OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec; - if (config->debug || config->repro || config->cetCompat) { + debugInfoSec = config->mingw ? buildidSec : rdataSec; + if (config->buildIDHash != BuildIDHash::None || config->debug || + config->repro || config->cetCompat) { debugDirectory = make<DebugDirectoryChunk>(ctx, debugRecords, config->repro); debugDirectory->setAlignment(4); debugInfoSec->addChunk(debugDirectory); } - if (config->debug) { + if (config->debug || config->buildIDHash != BuildIDHash::None) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even @@ -1056,6 +1143,9 @@ void Writer::createMiscChunks() { if (config->guardCF != GuardCFLevel::Off) createGuardCFTables(); + if (isArm64EC(config->machine)) + createECChunks(); + if (config->autoImport) createRuntimePseudoRelocs(); @@ -1068,6 +1158,7 @@ void Writer::createMiscChunks() { // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { + llvm::TimeTraceScope timeScope("Import tables"); // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) @@ -1097,6 +1188,7 @@ void Writer::appendImportThunks() { if (ctx.importFileInstances.empty()) return; + llvm::TimeTraceScope timeScope("Import thunks"); for (ImportFile *file : ctx.importFileInstances) { if (!file->live) continue; @@ -1128,6 +1220,7 @@ void Writer::appendImportThunks() { } void Writer::createExportTable() { + llvm::TimeTraceScope timeScope("Export table"); if (!edataSec->chunks.empty()) { // Allow using a custom built export table from input object files, instead // of having the linker synthesize the tables. @@ -1148,6 +1241,7 @@ void Writer::createExportTable() { } void Writer::removeUnusedSections() { + llvm::TimeTraceScope timeScope("Remove unused sections"); // Remove sections that we can be sure won't get content, to avoid // allocating space for their section headers. auto isUnused = [this](OutputSection *s) { @@ -1163,11 +1257,13 @@ void Writer::removeUnusedSections() { // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { + llvm::TimeTraceScope timeScope("Remove empty sections"); auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; }; llvm::erase_if(ctx.outputSections, isEmpty); } void Writer::assignOutputSectionIndices() { + llvm::TimeTraceScope timeScope("Output sections indices"); // Assign final output section indices, and assign each chunk to its output // section. uint32_t idx = 1; @@ -1258,6 +1354,7 @@ std::optional<coff_symbol16> Writer::createSymbol(Defined *def) { } void Writer::createSymbolAndStringTable() { + llvm::TimeTraceScope timeScope("Symbol and string table"); // PE/COFF images are limited to 8 byte section names. Longer names can be // supported by writing a non-standard string table, but this string table is // not mapped at runtime and the long names will therefore be inaccessible. @@ -1320,9 +1417,30 @@ void Writer::createSymbolAndStringTable() { } void Writer::mergeSections() { + llvm::TimeTraceScope timeScope("Merge sections"); if (!pdataSec->chunks.empty()) { - firstPdata = pdataSec->chunks.front(); - lastPdata = pdataSec->chunks.back(); + if (isArm64EC(ctx.config.machine)) { + // On ARM64EC .pdata may contain both ARM64 and X64 data. Split them by + // sorting and store their regions separately. + llvm::stable_sort(pdataSec->chunks, [=](const Chunk *a, const Chunk *b) { + return (a->getMachine() == AMD64) < (b->getMachine() == AMD64); + }); + + for (auto chunk : pdataSec->chunks) { + if (chunk->getMachine() == AMD64) { + hybridPdata.first = chunk; + hybridPdata.last = pdataSec->chunks.back(); + break; + } + + if (!pdata.first) + pdata.first = chunk; + pdata.last = chunk; + } + } else { + pdata.first = pdataSec->chunks.front(); + pdata.last = pdataSec->chunks.back(); + } } for (auto &p : ctx.config.merge) { @@ -1350,11 +1468,33 @@ void Writer::mergeSections() { } } +// EC targets may have chunks of various architectures mixed together at this +// point. Group code chunks of the same architecture together by sorting chunks +// by their EC range type. +void Writer::sortECChunks() { + if (!isArm64EC(ctx.config.machine)) + return; + + for (OutputSection *sec : ctx.outputSections) { + if (sec->isCodeSection()) + llvm::stable_sort(sec->chunks, [=](const Chunk *a, const Chunk *b) { + std::optional<chpe_range_type> aType = a->getArm64ECRangeType(), + bType = b->getArm64ECRangeType(); + return !aType || (bType && *aType < *bType); + }); + } +} + // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { + llvm::TimeTraceScope timeScope("Assign addresses"); Configuration *config = &ctx.config; + // We need to create EC code map so that ECCodeMapChunk knows its size. + // We do it here to make sure that we account for range extension chunks. + createECCodeMap(); + sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * numberOfDataDirectory + sizeof(coff_section) * ctx.outputSections.size(); @@ -1367,6 +1507,7 @@ void Writer::assignAddresses() { uint64_t rva = alignTo(sizeOfHeaders, config->align); for (OutputSection *sec : ctx.outputSections) { + llvm::TimeTraceScope timeScope("Section: ", sec->name); if (sec == relocSec) addBaserels(); uint64_t rawSize = 0, virtualSize = 0; @@ -1374,13 +1515,18 @@ void Writer::assignAddresses() { // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. - const bool isCodeSection = - (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && - (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && - (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); - uint32_t padding = isCodeSection ? config->functionPadMin : 0; + uint32_t padding = sec->isCodeSection() ? config->functionPadMin : 0; + std::optional<chpe_range_type> prevECRange; for (Chunk *c : sec->chunks) { + // Alignment EC code range baudaries. + if (isArm64EC(ctx.config.machine) && sec->isCodeSection()) { + std::optional<chpe_range_type> rangeType = c->getArm64ECRangeType(); + if (rangeType != prevECRange) { + virtualSize = alignTo(virtualSize, 4096); + prevECRange = rangeType; + } + } if (padding && c->isHotPatchable()) virtualSize += padding; virtualSize = alignTo(virtualSize, c->getAlignment()); @@ -1550,10 +1696,15 @@ template <typename PEHeaderTy> void Writer::writeHeader() { dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA(); dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize(); } - if (firstPdata) { - dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA(); - dir[EXCEPTION_TABLE].Size = - lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA(); + // ARM64EC (but not ARM64X) contains x86_64 exception table in data directory. + ChunkRange &exceptionTable = + ctx.config.machine == ARM64EC ? hybridPdata : pdata; + if (exceptionTable.first) { + dir[EXCEPTION_TABLE].RelativeVirtualAddress = + exceptionTable.first->getRVA(); + dir[EXCEPTION_TABLE].Size = exceptionTable.last->getRVA() + + exceptionTable.last->getSize() - + exceptionTable.first->getRVA(); } if (relocSec->getVirtualSize()) { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); @@ -1796,7 +1947,7 @@ void Writer::createGuardCFTables() { // Add the ehcont target table unless the user told us not to. if (config->guardCF & GuardCFLevel::EHCont) maybeAddRVATable(std::move(ehContTargets), "__guard_eh_cont_table", - "__guard_eh_cont_count", true); + "__guard_eh_cont_count"); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. @@ -1884,6 +2035,15 @@ void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, cast<DefinedAbsolute>(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4)); } +// Create CHPE metadata chunks. +void Writer::createECChunks() { + auto codeMapChunk = make<ECCodeMapChunk>(codeMap); + rdataSec->addChunk(codeMapChunk); + Symbol *codeMapSym = ctx.symtab.findUnderscore("__hybrid_code_map"); + replaceSymbol<DefinedSynthetic>(codeMapSym, codeMapSym->getName(), + codeMapChunk); +} + // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the @@ -1947,6 +2107,7 @@ void Writer::insertCtorDtorSymbols() { // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { + llvm::TimeTraceScope timeScope("Sections permissions"); for (auto &p : ctx.config.section) { StringRef name = p.first; uint32_t perm = p.second; @@ -1956,17 +2117,46 @@ void Writer::setSectionPermissions() { } } +// Set symbols used by ARM64EC metadata. +void Writer::setECSymbols() { + if (!isArm64EC(ctx.config.machine)) + return; + + Symbol *rfeTableSym = ctx.symtab.findUnderscore("__arm64x_extra_rfe_table"); + replaceSymbol<DefinedSynthetic>(rfeTableSym, "__arm64x_extra_rfe_table", + pdata.first); + + if (pdata.first) { + Symbol *rfeSizeSym = + ctx.symtab.findUnderscore("__arm64x_extra_rfe_table_size"); + cast<DefinedAbsolute>(rfeSizeSym) + ->setVA(pdata.last->getRVA() + pdata.last->getSize() - + pdata.first->getRVA()); + } +} + // Write section contents to a mmap'ed file. void Writer::writeSections() { + llvm::TimeTraceScope timeScope("Write sections"); uint8_t *buf = buffer->getBufferStart(); for (OutputSection *sec : ctx.outputSections) { uint8_t *secBuf = buf + sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as - // ADD instructions). + // ADD instructions). Only fill the gaps between chunks. Most + // chunks overwrite it anyway, but uninitialized data chunks + // merged into a code section don't. if ((sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && - (ctx.config.machine == AMD64 || ctx.config.machine == I386)) - memset(secBuf, 0xCC, sec->getRawSize()); + (ctx.config.machine == AMD64 || ctx.config.machine == I386)) { + uint32_t prevEnd = 0; + for (Chunk *c : sec->chunks) { + uint32_t off = c->getRVA() - sec->getRVA(); + memset(secBuf + prevEnd, 0xCC, off - prevEnd); + prevEnd = off + c->getSize(); + } + memset(secBuf + prevEnd, 0xCC, sec->getRawSize() - prevEnd); + } + parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); @@ -1974,6 +2164,8 @@ void Writer::writeSections() { } void Writer::writeBuildId() { + llvm::TimeTraceScope timeScope("Write build ID"); + // There are two important parts to the build ID. // 1) If building with debug info, the COFF debug directory contains a // timestamp as well as a Guid and Age of the PDB. @@ -1981,8 +2173,8 @@ void Writer::writeBuildId() { // For reproducibility, instead of a timestamp we want to use a hash of the // PE contents. Configuration *config = &ctx.config; - - if (config->debug) { + bool generateSyntheticBuildId = config->buildIDHash == BuildIDHash::Binary; + if (generateSyntheticBuildId) { assert(buildId && "BuildId is not set!"); // BuildId->BuildId was filled in when the PDB was written. } @@ -1997,8 +2189,6 @@ void Writer::writeBuildId() { uint32_t timestamp = config->timestamp; uint64_t hash = 0; - bool generateSyntheticBuildId = - config->mingw && config->debug && config->pdbPath.empty(); if (config->repro || generateSyntheticBuildId) hash = xxh3_64bits(outputFileData); @@ -2007,8 +2197,6 @@ void Writer::writeBuildId() { timestamp = static_cast<uint32_t>(hash); if (generateSyntheticBuildId) { - // For MinGW builds without a PDB file, we still generate a build id - // to allow associating a crash dump to the executable. buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70; buildId->buildId->PDB70.Age = 1; memcpy(buildId->buildId->PDB70.Signature, &hash, 8); @@ -2027,40 +2215,57 @@ void Writer::writeBuildId() { } // Sort .pdata section contents according to PE/COFF spec 5.5. -void Writer::sortExceptionTable() { - if (!firstPdata) +template <typename T> +void Writer::sortExceptionTable(ChunkRange &exceptionTable) { + if (!exceptionTable.first) return; + // We assume .pdata contains function table entries only. auto bufAddr = [&](Chunk *c) { OutputSection *os = ctx.getOutputSection(c); return buffer->getBufferStart() + os->getFileOff() + c->getRVA() - os->getRVA(); }; - uint8_t *begin = bufAddr(firstPdata); - uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); - if (ctx.config.machine == AMD64) { - struct Entry { ulittle32_t begin, end, unwind; }; - if ((end - begin) % sizeof(Entry) != 0) { - fatal("unexpected .pdata size: " + Twine(end - begin) + - " is not a multiple of " + Twine(sizeof(Entry))); - } - parallelSort( - MutableArrayRef<Entry>((Entry *)begin, (Entry *)end), - [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); - return; + uint8_t *begin = bufAddr(exceptionTable.first); + uint8_t *end = bufAddr(exceptionTable.last) + exceptionTable.last->getSize(); + if ((end - begin) % sizeof(T) != 0) { + fatal("unexpected .pdata size: " + Twine(end - begin) + + " is not a multiple of " + Twine(sizeof(T))); } - if (ctx.config.machine == ARMNT || ctx.config.machine == ARM64) { - struct Entry { ulittle32_t begin, unwind; }; - if ((end - begin) % sizeof(Entry) != 0) { - fatal("unexpected .pdata size: " + Twine(end - begin) + - " is not a multiple of " + Twine(sizeof(Entry))); - } - parallelSort( - MutableArrayRef<Entry>((Entry *)begin, (Entry *)end), - [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); - return; + + parallelSort(MutableArrayRef<T>(reinterpret_cast<T *>(begin), + reinterpret_cast<T *>(end)), + [](const T &a, const T &b) { return a.begin < b.begin; }); +} + +// Sort .pdata section contents according to PE/COFF spec 5.5. +void Writer::sortExceptionTables() { + llvm::TimeTraceScope timeScope("Sort exception table"); + + struct EntryX64 { + ulittle32_t begin, end, unwind; + }; + struct EntryArm { + ulittle32_t begin, unwind; + }; + + switch (ctx.config.machine) { + case AMD64: + sortExceptionTable<EntryX64>(pdata); + break; + case ARM64EC: + case ARM64X: + sortExceptionTable<EntryX64>(hybridPdata); + [[fallthrough]]; + case ARMNT: + case ARM64: + sortExceptionTable<EntryArm>(pdata); + break; + default: + if (pdata.first) + lld::errs() << "warning: don't know how to handle .pdata.\n"; + break; } - lld::errs() << "warning: don't know how to handle .pdata.\n"; } // The CRT section contains, among other things, the array of function @@ -2123,6 +2328,7 @@ void Writer::addBaserels() { for (OutputSection *sec : ctx.outputSections) { if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; + llvm::TimeTraceScope timeScope("Base relocations: ", sec->name); // Collect all locations for base relocations. for (Chunk *c : sec->chunks) c->getBaserels(&v); @@ -2197,7 +2403,7 @@ void Writer::fixTlsAlignment() { } } -void Writer::checkLoadConfig() { +void Writer::prepareLoadConfig() { Symbol *sym = ctx.symtab.findUnderscore("_load_config_used"); auto *b = cast_if_present<DefinedRegular>(sym); if (!b) { @@ -2221,11 +2427,16 @@ void Writer::checkLoadConfig() { Twine(expectedAlign) + " bytes)"); if (ctx.config.is64()) - checkLoadConfigGuardData( - reinterpret_cast<const coff_load_configuration64 *>(symBuf)); + prepareLoadConfig(reinterpret_cast<coff_load_configuration64 *>(symBuf)); else - checkLoadConfigGuardData( - reinterpret_cast<const coff_load_configuration32 *>(symBuf)); + prepareLoadConfig(reinterpret_cast<coff_load_configuration32 *>(symBuf)); +} + +template <typename T> void Writer::prepareLoadConfig(T *loadConfig) { + if (ctx.config.dependentLoadFlags) + loadConfig->DependentLoadFlags = ctx.config.dependentLoadFlags; + + checkLoadConfigGuardData(loadConfig); } template <typename T> diff --git a/lld/COFF/Writer.h b/lld/COFF/Writer.h index 4a74aa7ada59..9004bb310d07 100644 --- a/lld/COFF/Writer.h +++ b/lld/COFF/Writer.h @@ -64,6 +64,12 @@ public: // Used only when the name is longer than 8 bytes. void setStringTableOff(uint32_t v) { stringTableOff = v; } + bool isCodeSection() const { + return (header.Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) && + (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) && + (header.Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE); + } + // N.B. The section index is one based. uint32_t sectionIndex = 0; diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp index 671b352a3f6b..c2d3644191c9 100644 --- a/lld/Common/Filesystem.cpp +++ b/lld/Common/Filesystem.cpp @@ -11,11 +11,13 @@ //===----------------------------------------------------------------------===// #include "lld/Common/Filesystem.h" +#include "lld/Common/ErrorHandler.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" +#include "llvm/Support/TimeProfiler.h" #if LLVM_ON_UNIX #include <unistd.h> #endif @@ -57,7 +59,7 @@ void lld::unlinkAsync(StringRef path) { // // The code here allows LLD to work on all versions of Windows. // However, at Windows 10 1903 it seems that the behavior of - // Windows has changed, so that we could simply delete the output + // Windows has changed, so that we could simply delete the output // file. This code should be simplified once support for older // versions of Windows is dropped. // @@ -121,9 +123,35 @@ void lld::unlinkAsync(StringRef path) { // is called. We use that class without calling commit() to predict // if the given file is writable. std::error_code lld::tryCreateFile(StringRef path) { + llvm::TimeTraceScope timeScope("Try create output file"); if (path.empty()) return std::error_code(); if (path == "-") return std::error_code(); return errorToErrorCode(FileOutputBuffer::create(path, 1).takeError()); } + +// Creates an empty file to and returns a raw_fd_ostream to write to it. +std::unique_ptr<raw_fd_ostream> lld::openFile(StringRef file) { + std::error_code ec; + auto ret = + std::make_unique<raw_fd_ostream>(file, ec, sys::fs::OpenFlags::OF_None); + if (ec) { + error("cannot open " + file + ": " + ec.message()); + return nullptr; + } + return ret; +} + +// The merged bitcode after LTO is large. Try opening a file stream that +// supports reading, seeking and writing. Such a file allows BitcodeWriter to +// flush buffered data to reduce memory consumption. If this fails, open a file +// stream that supports only write. +std::unique_ptr<raw_fd_ostream> lld::openLTOOutputFile(StringRef file) { + std::error_code ec; + std::unique_ptr<raw_fd_ostream> fs = + std::make_unique<raw_fd_stream>(file, ec); + if (!ec) + return fs; + return openFile(file); +} diff --git a/lld/Common/Strings.cpp b/lld/Common/Strings.cpp index db22c06eb17e..41cbbf36f38c 100644 --- a/lld/Common/Strings.cpp +++ b/lld/Common/Strings.cpp @@ -27,7 +27,7 @@ SingleStringMatcher::SingleStringMatcher(StringRef Pattern) { } else { Expected<GlobPattern> Glob = GlobPattern::create(Pattern); if (!Glob) { - error(toString(Glob.takeError())); + error(toString(Glob.takeError()) + ": " + Pattern); return; } ExactMatch = false; diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index c83a159e3f05..048f0ec30ebd 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "InputFiles.h" #include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" @@ -377,6 +378,20 @@ void AArch64::relocate(uint8_t *loc, const Relocation &rel, write32(loc, val); break; case R_AARCH64_ABS64: + // AArch64 relocations to tagged symbols have extended semantics, as + // described here: + // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative. + // tl;dr: encode the symbol's special addend in the place, which is an + // offset to the point where the logical tag is derived from. Quick hack, if + // the addend is within the symbol's bounds, no need to encode the tag + // derivation offset. + if (rel.sym && rel.sym->isTagged() && + (rel.addend < 0 || + rel.addend >= static_cast<int64_t>(rel.sym->getSize()))) + write64(loc, -rel.addend); + else + write64(loc, val); + break; case R_AARCH64_PREL64: write64(loc, val); break; @@ -745,10 +760,18 @@ bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, return true; } +// Tagged symbols have upper address bits that are added by the dynamic loader, +// and thus need the full 64-bit GOT entry. Do not relax such symbols. +static bool needsGotForMemtag(const Relocation &rel) { + return rel.sym->isTagged() && needsGot(rel.expr); +} + void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) + secAddr += ehIn->getParent()->outSecOff; AArch64Relaxer relaxer(sec.relocs()); for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { const Relocation &rel = sec.relocs()[i]; @@ -756,6 +779,12 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { const uint64_t val = sec.getRelocTargetVA(sec.file, rel.type, rel.addend, secAddr + rel.offset, *rel.sym, rel.expr); + + if (needsGotForMemtag(rel)) { + relocate(loc, rel, val); + continue; + } + switch (rel.expr) { case R_AARCH64_GOT_PAGE_PC: if (i + 1 < size && @@ -950,3 +979,107 @@ static TargetInfo *getTargetInfo() { } TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); } + +template <class ELFT> +static void +addTaggedSymbolReferences(InputSectionBase &sec, + DenseMap<Symbol *, unsigned> &referenceCount) { + assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC); + + const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>(); + if (rels.areRelocsRel()) + error("non-RELA relocations are not allowed with memtag globals"); + + for (const typename ELFT::Rela &rel : rels.relas) { + Symbol &sym = sec.getFile<ELFT>()->getRelocTargetSym(rel); + // Linker-synthesized symbols such as __executable_start may be referenced + // as tagged in input objfiles, and we don't want them to be tagged. A + // cheap way to exclude them is the type check, but their type is + // STT_NOTYPE. In addition, this save us from checking untaggable symbols, + // like functions or TLS symbols. + if (sym.type != STT_OBJECT) + continue; + // STB_LOCAL symbols can't be referenced from outside the object file, and + // thus don't need to be checked for references from other object files. + if (sym.binding == STB_LOCAL) { + sym.setIsTagged(true); + continue; + } + ++referenceCount[&sym]; + } + sec.markDead(); +} + +// A tagged symbol must be denoted as being tagged by all references and the +// chosen definition. For simplicity, here, it must also be denoted as tagged +// for all definitions. Otherwise: +// +// 1. A tagged definition can be used by an untagged declaration, in which case +// the untagged access may be PC-relative, causing a tag mismatch at +// runtime. +// 2. An untagged definition can be used by a tagged declaration, where the +// compiler has taken advantage of the increased alignment of the tagged +// declaration, but the alignment at runtime is wrong, causing a fault. +// +// Ideally, this isn't a problem, as any TU that imports or exports tagged +// symbols should also be built with tagging. But, to handle these cases, we +// demote the symbol to be untagged. +void lld::elf::createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files) { + assert(config->emachine == EM_AARCH64 && + config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE); + + // First, collect all symbols that are marked as tagged, and count how many + // times they're marked as tagged. + DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount; + for (InputFile* file : files) { + if (file->kind() != InputFile::ObjKind) + continue; + for (InputSectionBase *section : file->getSections()) { + if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC || + section == &InputSection::discarded) + continue; + invokeELFT(addTaggedSymbolReferences, *section, + taggedSymbolReferenceCount); + } + } + + // Now, go through all the symbols. If the number of declarations + + // definitions to a symbol exceeds the amount of times they're marked as + // tagged, it means we have an objfile that uses the untagged variant of the + // symbol. + for (InputFile *file : files) { + if (file->kind() != InputFile::BinaryKind && + file->kind() != InputFile::ObjKind) + continue; + + for (Symbol *symbol : file->getSymbols()) { + // See `addTaggedSymbolReferences` for more details. + if (symbol->type != STT_OBJECT || + symbol->binding == STB_LOCAL) + continue; + auto it = taggedSymbolReferenceCount.find(symbol); + if (it == taggedSymbolReferenceCount.end()) continue; + unsigned &remainingAllowedTaggedRefs = it->second; + if (remainingAllowedTaggedRefs == 0) { + taggedSymbolReferenceCount.erase(it); + continue; + } + --remainingAllowedTaggedRefs; + } + } + + // `addTaggedSymbolReferences` has already checked that we have RELA + // relocations, the only other way to get written addends is with + // --apply-dynamic-relocs. + if (!taggedSymbolReferenceCount.empty() && config->writeAddends) + error("--apply-dynamic-relocs cannot be used with MTE globals"); + + // Now, `taggedSymbolReferenceCount` should only contain symbols that are + // defined as tagged exactly the same amount as it's referenced, meaning all + // uses are tagged. + for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) { + assert(remainingTaggedRefs == 0 && + "Symbol is defined as tagged more times than it's used"); + symbol->setIsTagged(true); + } +} diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp index e5605497e272..650744db7dee 100644 --- a/lld/ELF/Arch/AMDGPU.cpp +++ b/lld/ELF/Arch/AMDGPU.cpp @@ -34,6 +34,7 @@ public: RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; }; } // namespace @@ -183,6 +184,20 @@ RelType AMDGPU::getDynRel(RelType type) const { return R_AMDGPU_NONE; } +int64_t AMDGPU::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + case R_AMDGPU_NONE: + return 0; + case R_AMDGPU_ABS64: + case R_AMDGPU_RELATIVE64: + return read64(buf); + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + } +} + TargetInfo *elf::getAMDGPUTargetInfo() { static AMDGPU target; return ⌖ diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp index 4905d61796fa..9211eabc9669 100644 --- a/lld/ELF/Arch/AVR.cpp +++ b/lld/ELF/Arch/AVR.cpp @@ -231,13 +231,14 @@ void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { // Since every jump destination is word aligned we gain an extra bit case R_AVR_7_PCREL: { - checkInt(loc, val, 7, rel); + checkInt(loc, val - 2, 7, rel); checkAlignment(loc, val, 2, rel); const uint16_t target = (val - 2) >> 1; write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3)); break; } case R_AVR_13_PCREL: { + checkInt(loc, val - 2, 13, rel); checkAlignment(loc, val, 2, rel); const uint16_t target = (val - 2) >> 1; write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff)); @@ -255,6 +256,7 @@ void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const { break; case R_AVR_CALL: { + checkAlignment(loc, val, 2, rel); uint16_t hi = val >> 17; uint16_t lo = val >> 1; write16le(loc, read16le(loc) | ((hi >> 1) << 4) | (hi & 1)); diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index bc26653697c7..54821c299bde 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -29,6 +29,7 @@ public: RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; RelType getDynRel(RelType type) const override; + int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; void writePltHeader(uint8_t *buf) const override; @@ -386,6 +387,25 @@ RelType Hexagon::getDynRel(RelType type) const { return R_HEX_NONE; } +int64_t Hexagon::getImplicitAddend(const uint8_t *buf, RelType type) const { + switch (type) { + case R_HEX_NONE: + case R_HEX_GLOB_DAT: + case R_HEX_JMP_SLOT: + return 0; + case R_HEX_32: + case R_HEX_RELATIVE: + case R_HEX_DTPMOD_32: + case R_HEX_DTPREL_32: + case R_HEX_TPREL_32: + return SignExtend64<32>(read32(buf)); + default: + internalLinkerError(getErrorLocation(buf), + "cannot read addend for relocation " + toString(type)); + return 0; + } +} + TargetInfo *elf::getHexagonTargetInfo() { static Hexagon target; return ⌖ diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 9dc99e573d41..1c3e015efc16 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -165,7 +165,6 @@ uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { result -= 0x10000'0000; else if (!negativeA && negativeB) result += 0x10000'0000; - return result; } @@ -444,10 +443,12 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_TLS_LE64_LO20: case R_LARCH_TLS_LE64_HI12: return R_TPREL; + case R_LARCH_ADD6: case R_LARCH_ADD8: case R_LARCH_ADD16: case R_LARCH_ADD32: case R_LARCH_ADD64: + case R_LARCH_SUB6: case R_LARCH_SUB8: case R_LARCH_SUB16: case R_LARCH_SUB32: @@ -457,6 +458,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, return R_RISCV_ADD; case R_LARCH_32_PCREL: case R_LARCH_64_PCREL: + case R_LARCH_PCREL20_S2: return R_PC; case R_LARCH_B16: case R_LARCH_B21: @@ -564,6 +566,12 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write64le(loc, val); return; + case R_LARCH_PCREL20_S2: + checkInt(loc, val, 22, rel); + checkAlignment(loc, val, 4, rel); + write32le(loc, setJ20(read32le(loc), val >> 2)); + return; + case R_LARCH_B16: checkInt(loc, val, 18, rel); checkAlignment(loc, val, 4, rel); @@ -643,6 +651,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); return; + case R_LARCH_ADD6: + *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f); + return; case R_LARCH_ADD8: *loc += val; return; @@ -655,6 +666,9 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_ADD64: write64le(loc, read64le(loc) + val); return; + case R_LARCH_SUB6: + *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); + return; case R_LARCH_SUB8: *loc -= val; return; diff --git a/lld/ELF/Arch/Mips.cpp b/lld/ELF/Arch/Mips.cpp index d5a335c65932..d6c70aeba95d 100644 --- a/lld/ELF/Arch/Mips.cpp +++ b/lld/ELF/Arch/Mips.cpp @@ -219,7 +219,7 @@ template <endianness E> static uint32_t readShuffle(const uint8_t *loc) { // words in a big-endian order. That is why we have to swap these // words to get a correct value. uint32_t v = read32(loc); - if (E == support::little) + if (E == llvm::endianness::little) return (v << 16) | (v >> 16); return v; } @@ -237,12 +237,12 @@ static void writeShuffleValue(uint8_t *loc, uint64_t v, uint8_t bitsSize, uint8_t shift) { // See comments in readShuffle for purpose of this code. uint16_t *words = (uint16_t *)loc; - if (E == support::little) + if (E == llvm::endianness::little) std::swap(words[0], words[1]); writeValue(loc, v, bitsSize, shift); - if (E == support::little) + if (E == llvm::endianness::little) std::swap(words[0], words[1]); } diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 87942c1e9245..1b0838456428 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -278,9 +278,16 @@ RelType PPC::getDynRel(RelType type) const { int64_t PPC::getImplicitAddend(const uint8_t *buf, RelType type) const { switch (type) { case R_PPC_NONE: + case R_PPC_GLOB_DAT: + case R_PPC_JMP_SLOT: return 0; case R_PPC_ADDR32: case R_PPC_REL32: + case R_PPC_RELATIVE: + case R_PPC_IRELATIVE: + case R_PPC_DTPMOD32: + case R_PPC_DTPREL32: + case R_PPC_TPREL32: return SignExtend64<32>(read32(buf)); default: internalLinkerError(getErrorLocation(buf), @@ -471,10 +478,14 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, if (insn >> 26 != 31) error("unrecognized instruction for IE to LE R_PPC_TLS"); // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l - uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC_TLS"); - write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val)); + unsigned secondaryOp = (read32(loc) & 0x000007fe) >> 1; + uint32_t dFormOp = getPPCDFormOp(secondaryOp); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC_TLS"); + } + write32(loc, (dFormOp | (insn & 0x03ff0000) | lo(val))); break; } default: diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 36b1d0e3c9be..097a57514770 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -37,6 +37,12 @@ enum XFormOpcd { STHX = 407, STWX = 151, STDX = 149, + LHAX = 343, + LWAX = 341, + LFSX = 535, + LFDX = 599, + STFSX = 663, + STFDX = 727, ADD = 266, }; @@ -49,7 +55,6 @@ enum DFormOpcd { LWZ = 32, LWZU = 33, LFSU = 49, - LD = 58, LFDU = 51, STB = 38, STBU = 39, @@ -59,10 +64,20 @@ enum DFormOpcd { STWU = 37, STFSU = 53, STFDU = 55, - STD = 62, + LHA = 42, + LFS = 48, + LFD = 50, + STFS = 52, + STFD = 54, ADDI = 14 }; +enum DSFormOpcd { + LD = 58, + LWA = 58, + STD = 62 +}; + constexpr uint32_t NOP = 0x60000000; enum class PPCLegacyInsn : uint32_t { @@ -825,26 +840,48 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, } } +// Map X-Form instructions to their DS-Form counterparts, if applicable. +// The full encoding is returned here to distinguish between the different +// DS-Form instructions. +unsigned elf::getPPCDSFormOp(unsigned secondaryOp) { + switch (secondaryOp) { + case LWAX: + return (LWA << 26) | 0x2; + case LDX: + return LD << 26; + case STDX: + return STD << 26; + default: + return 0; + } +} + unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: - return LBZ; + return LBZ << 26; case LHZX: - return LHZ; + return LHZ << 26; case LWZX: - return LWZ; - case LDX: - return LD; + return LWZ << 26; case STBX: - return STB; + return STB << 26; case STHX: - return STH; + return STH << 26; case STWX: - return STW; - case STDX: - return STD; + return STW << 26; + case LHAX: + return LHA << 26; + case LFSX: + return LFS << 26; + case LFDX: + return LFD << 26; + case STFSX: + return STFS << 26; + case STFDX: + return STFD << 26; case ADD: - return ADDI; + return ADDI << 26; default: return 0; } @@ -898,10 +935,16 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, error("unrecognized instruction for IE to LE R_PPC64_TLS"); uint32_t secondaryOp = (read32(loc) & 0x000007FE) >> 1; // bits 21-30 uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); - relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val); + uint32_t finalReloc; + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC64_TLS"); + finalReloc = R_PPC64_TPREL16_LO_DS; + } else + finalReloc = R_PPC64_TPREL16_LO; + write32(loc, dFormOp | (read32(loc) & 0x03ff0000)); + relocateNoSym(loc + offset, finalReloc, val); } else if (locAsInt % 4 == 1) { // If the offset is not 4 byte aligned then we have a PCRel type reloc. // This version of the relocation is offset by one byte from the @@ -926,9 +969,12 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, } } else { uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc - 1, ((dFormOp << 26) | (tlsInstr & 0x03FF0000))); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); + } + write32(loc - 1, (dFormOp | (tlsInstr & 0x03ff0000))); } } else { errorOrWarn("R_PPC64_TLS must be either 4 byte aligned or one byte " @@ -1517,6 +1563,8 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) + secAddr += ehIn->getParent()->outSecOff; uint64_t lastPPCRelaxedRelocOff = -1; for (const Relocation &rel : sec.relocs()) { uint8_t *loc = buf + rel.offset; @@ -1556,7 +1604,7 @@ void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { break; // Patch a nop (0x60000000) to a ld. - if (rel.sym->needsTocRestore) { + if (rel.sym->needsTocRestore()) { // gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for // recursive calls even if the function is preemptible. This is not // wrong in the common case where the function is not preempted at diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp index d0d75118e30d..898e3e45b9e7 100644 --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -306,6 +306,8 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s, case R_RISCV_TPREL_ADD: case R_RISCV_RELAX: return config->relax ? R_RELAX_HINT : R_NONE; + case R_RISCV_SET_ULEB128: + return R_RISCV_LEB128; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -589,7 +591,7 @@ static void initSymbolAnchors() { // Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal. static void relaxCall(const InputSection &sec, size_t i, uint64_t loc, Relocation &r, uint32_t &remove) { - const bool rvc = config->eflags & EF_RISCV_RVC; + const bool rvc = getEFlags(sec.file) & EF_RISCV_RVC; const Symbol &sym = *r.sym; const uint64_t insnPair = read64le(sec.content().data() + r.offset); const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7); @@ -933,7 +935,7 @@ mergeAttributesSection(const SmallVector<InputSectionBase *, 0> §ions) { const auto &attributesTags = RISCVAttrs::getRISCVAttributeTags(); for (const InputSectionBase *sec : sections) { RISCVAttributeParser parser; - if (Error e = parser.parse(sec->content(), support::little)) + if (Error e = parser.parse(sec->content(), llvm::endianness::little)) warn(toString(sec) + ": " + llvm::toString(std::move(e))); for (const auto &tag : attributesTags) { switch (RISCVAttrs::AttrType(tag.attr)) { diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index 349ccd218a57..2135ac234864 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "OutputSections.h" +#include "Relocations.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" #include "lld/Common/ErrorHandler.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/MathExtras.h" using namespace llvm; using namespace llvm::object; @@ -47,6 +49,7 @@ public: uint8_t stOther) const override; bool deleteFallThruJmpInsn(InputSection &is, InputFile *file, InputSection *nextIS) const override; + bool relaxOnce(int pass) const override; }; } // namespace @@ -305,6 +308,43 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file, return true; } +bool X86_64::relaxOnce(int pass) const { + uint64_t minVA = UINT64_MAX, maxVA = 0; + for (OutputSection *osec : outputSections) { + minVA = std::min(minVA, osec->addr); + maxVA = std::max(maxVA, osec->addr + osec->size); + } + // If the max VA difference is under 2^31, GOT-generating relocations with a 32-bit range cannot overflow. + if (isUInt<31>(maxVA - minVA)) + return false; + + SmallVector<InputSection *, 0> storage; + bool changed = false; + for (OutputSection *osec : outputSections) { + if (!(osec->flags & SHF_EXECINSTR)) + continue; + for (InputSection *sec : getInputSections(*osec, storage)) { + for (Relocation &rel : sec->relocs()) { + if (rel.expr != R_RELAX_GOT_PC) + continue; + + uint64_t v = sec->getRelocTargetVA( + sec->file, rel.type, rel.addend, + sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); + if (isInt<32>(v)) + continue; + if (rel.sym->auxIdx == 0) { + rel.sym->allocateAux(); + addGotEntry(*rel.sym); + changed = true; + } + rel.expr = R_GOT_PC; + } + } + } + return changed; +} + RelExpr X86_64::getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const { switch (type) { @@ -912,7 +952,8 @@ static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op, } static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) { - checkInt(loc, val, 32, rel); + assert(isInt<32>(val) && + "GOTPCRELX should not have been relaxed if it overflows"); const uint8_t op = loc[-2]; const uint8_t modRm = loc[-1]; @@ -989,6 +1030,8 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) + secAddr += ehIn->getParent()->outSecOff; for (const Relocation &rel : sec.relocs()) { if (rel.expr == R_NONE) // See deleteFallThruJmpInsn continue; diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index ff72731b1f38..a0cf491bbae3 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -6,38 +6,21 @@ // //===----------------------------------------------------------------------===// /// -/// Implementation of Call-Chain Clustering from: Optimizing Function Placement -/// for Large-Scale Data-Center Applications -/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf -/// -/// The goal of this algorithm is to improve runtime performance of the final -/// executable by arranging code sections such that page table and i-cache -/// misses are minimized. -/// -/// Definitions: -/// * Cluster -/// * An ordered list of input sections which are laid out as a unit. At the -/// beginning of the algorithm each input section has its own cluster and -/// the weight of the cluster is the sum of the weight of all incoming -/// edges. -/// * Call-Chain Clustering (C³) Heuristic -/// * Defines when and how clusters are combined. Pick the highest weighted -/// input section then add it to its most likely predecessor if it wouldn't -/// penalize it too much. -/// * Density -/// * The weight of the cluster divided by the size of the cluster. This is a -/// proxy for the amount of execution time spent per byte of the cluster. -/// -/// It does so given a call graph profile by the following: -/// * Build a weighted call graph from the call graph profile -/// * Sort input sections by weight -/// * For each input section starting with the highest weight -/// * Find its most likely predecessor cluster -/// * Check if the combined cluster would be too large, or would have too low -/// a density. -/// * If not, then combine the clusters. -/// * Sort non-empty clusters by density +/// The file is responsible for sorting sections using LLVM call graph profile +/// data by placing frequently executed code sections together. The goal of the +/// placement is to improve the runtime performance of the final executable by +/// arranging code sections so that i-TLB misses and i-cache misses are reduced. /// +/// The algorithm first builds a call graph based on the profile data and then +/// iteratively merges "chains" (ordered lists) of input sections which will be +/// laid out as a unit. There are two implementations for deciding how to +/// merge a pair of chains: +/// - a simpler one, referred to as Call-Chain Clustering (C^3), that follows +/// "Optimizing Function Placement for Large-Scale Data-Center Applications" +/// https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf +/// - a more advanced one, referred to as Cache-Directed-Sort (CDSort), which +/// typically produces layouts with higher locality, and hence, yields fewer +/// instruction cache misses on large binaries. //===----------------------------------------------------------------------===// #include "CallGraphSort.h" @@ -45,6 +28,7 @@ #include "InputSection.h" #include "Symbols.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Transforms/Utils/CodeLayout.h" #include <numeric> @@ -75,6 +59,33 @@ struct Cluster { Edge bestPred = {-1, 0}; }; +/// Implementation of the Call-Chain Clustering (C^3). The goal of this +/// algorithm is to improve runtime performance of the executable by arranging +/// code sections such that page table and i-cache misses are minimized. +/// +/// Definitions: +/// * Cluster +/// * An ordered list of input sections which are laid out as a unit. At the +/// beginning of the algorithm each input section has its own cluster and +/// the weight of the cluster is the sum of the weight of all incoming +/// edges. +/// * Call-Chain Clustering (C³) Heuristic +/// * Defines when and how clusters are combined. Pick the highest weighted +/// input section then add it to its most likely predecessor if it wouldn't +/// penalize it too much. +/// * Density +/// * The weight of the cluster divided by the size of the cluster. This is a +/// proxy for the amount of execution time spent per byte of the cluster. +/// +/// It does so given a call graph profile by the following: +/// * Build a weighted call graph from the call graph profile +/// * Sort input sections by weight +/// * For each input section starting with the highest weight +/// * Find its most likely predecessor cluster +/// * Check if the combined cluster would be too large, or would have too low +/// a density. +/// * If not, then combine the clusters. +/// * Sort non-empty clusters by density class CallGraphSort { public: CallGraphSort(); @@ -260,11 +271,73 @@ DenseMap<const InputSectionBase *, int> CallGraphSort::run() { return orderMap; } +// Sort sections by the profile data using the Cache-Directed Sort algorithm. +// The placement is done by optimizing the locality by co-locating frequently +// executed code sections together. +DenseMap<const InputSectionBase *, int> elf::computeCacheDirectedSortOrder() { + SmallVector<uint64_t, 0> funcSizes; + SmallVector<uint64_t, 0> funcCounts; + SmallVector<codelayout::EdgeCount, 0> callCounts; + SmallVector<uint64_t, 0> callOffsets; + SmallVector<const InputSectionBase *, 0> sections; + DenseMap<const InputSectionBase *, size_t> secToTargetId; + + auto getOrCreateNode = [&](const InputSectionBase *inSec) -> size_t { + auto res = secToTargetId.try_emplace(inSec, sections.size()); + if (res.second) { + // inSec does not appear before in the graph. + sections.push_back(inSec); + funcSizes.push_back(inSec->getSize()); + funcCounts.push_back(0); + } + return res.first->second; + }; + + // Create the graph. + for (std::pair<SectionPair, uint64_t> &c : config->callGraphProfile) { + const InputSectionBase *fromSB = cast<InputSectionBase>(c.first.first); + const InputSectionBase *toSB = cast<InputSectionBase>(c.first.second); + // Ignore edges between input sections belonging to different sections. + if (fromSB->getOutputSection() != toSB->getOutputSection()) + continue; + + uint64_t weight = c.second; + // Ignore edges with zero weight. + if (weight == 0) + continue; + + size_t from = getOrCreateNode(fromSB); + size_t to = getOrCreateNode(toSB); + // Ignore self-edges (recursive calls). + if (from == to) + continue; + + callCounts.push_back({from, to, weight}); + // Assume that the jump is at the middle of the input section. The profile + // data does not contain jump offsets. + callOffsets.push_back((funcSizes[from] + 1) / 2); + funcCounts[to] += weight; + } + + // Run the layout algorithm. + std::vector<uint64_t> sortedSections = codelayout::computeCacheDirectedLayout( + funcSizes, funcCounts, callCounts, callOffsets); + + // Create the final order. + DenseMap<const InputSectionBase *, int> orderMap; + int curOrder = 1; + for (uint64_t secIdx : sortedSections) + orderMap[sections[secIdx]] = curOrder++; + + return orderMap; +} + // Sort sections by the profile data provided by --callgraph-profile-file. // // This first builds a call graph based on the profile data then merges sections -// according to the C³ heuristic. All clusters are then sorted by a density -// metric to further improve locality. +// according either to the C³ or Cache-Directed-Sort ordering algorithm. DenseMap<const InputSectionBase *, int> elf::computeCallGraphProfileOrder() { + if (config->callGraphProfileSort == CGProfileSortKind::Cdsort) + return computeCacheDirectedSortOrder(); return CallGraphSort().run(); } diff --git a/lld/ELF/CallGraphSort.h b/lld/ELF/CallGraphSort.h index 4997cb102c32..1b54f2b62482 100644 --- a/lld/ELF/CallGraphSort.h +++ b/lld/ELF/CallGraphSort.h @@ -14,6 +14,8 @@ namespace lld::elf { class InputSectionBase; +llvm::DenseMap<const InputSectionBase *, int> computeCacheDirectedSortOrder(); + llvm::DenseMap<const InputSectionBase *, int> computeCallGraphProfileOrder(); } // namespace lld::elf diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index bbf2d2015645..56229334f9a4 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -53,12 +53,15 @@ enum ELFKind : uint8_t { }; // For -Bno-symbolic, -Bsymbolic-non-weak-functions, -Bsymbolic-functions, -// -Bsymbolic. -enum class BsymbolicKind { None, NonWeakFunctions, Functions, All }; +// -Bsymbolic-non-weak, -Bsymbolic. +enum class BsymbolicKind { None, NonWeakFunctions, Functions, NonWeak, All }; // For --build-id. enum class BuildIdKind { None, Fast, Md5, Sha1, Hexstring, Uuid }; +// For --call-graph-profile-sort={none,hfsort,cdsort}. +enum class CGProfileSortKind { None, Hfsort, Cdsort }; + // For --discard-{all,locals,none}. enum class DiscardPolicy { Default, All, Locals, None }; @@ -125,7 +128,8 @@ private: void inferMachineType(); void link(llvm::opt::InputArgList &args); template <class ELFT> void compileBitcodeFiles(bool skipLinkedOutput); - + bool tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive, bool lazy); // True if we are in --whole-archive and --no-whole-archive. bool inWholeArchive = false; @@ -205,6 +209,7 @@ struct Config { callGraphProfile; bool cmseImplib = false; bool allowMultipleDefinition; + bool fatLTOObjects; bool androidPackDynRelocs = false; bool armHasBlx = false; bool armHasMovtMovw = false; @@ -213,7 +218,7 @@ struct Config { bool asNeeded = false; bool armBe8 = false; BsymbolicKind bsymbolic = BsymbolicKind::None; - bool callGraphProfileSort; + CGProfileSortKind callGraphProfileSort; bool checkSections; bool checkDynamicRelocs; llvm::DebugCompressionType compressDebugSections; @@ -245,6 +250,7 @@ struct Config { bool ltoDebugPassManager; bool ltoEmitAsm; bool ltoUniqueBasicBlockSectionNames; + bool ltoValidateAllVtablesHaveTypeInfos; bool ltoWholeProgramVisibility; bool mergeArmExidx; bool mipsN32Abi = false; @@ -339,7 +345,7 @@ struct Config { uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; - llvm::CodeGenOpt::Level ltoCgo; + llvm::CodeGenOptLevel ltoCgo; unsigned optimize; StringRef thinLTOJobs; unsigned timeTraceGranularity; @@ -360,7 +366,7 @@ struct Config { bool isLE; // endianness::little if isLE is true. endianness::big otherwise. - llvm::support::endianness endianness; + llvm::endianness endianness; // True if the target is the little-endian MIPS64. // @@ -473,6 +479,15 @@ struct Ctx { std::atomic<bool> hasTlsIe{false}; // True if we need to reserve two .got entries for local-dynamic TLS model. std::atomic<bool> needsTlsLd{false}; + // True if all native vtable symbols have corresponding type info symbols + // during LTO. + bool ltoAllVtablesHaveTypeInfos; + + // Each symbol assignment and DEFINED(sym) reference is assigned an increasing + // order. Each DEFINED(sym) evaluation checks whether the reference happens + // before a possible `sym = expr;`. + unsigned scriptSymOrderCounter = 1; + llvm::DenseMap<const Symbol *, unsigned> scriptSymOrder; void reset(); diff --git a/lld/ELF/DWARF.h b/lld/ELF/DWARF.h index 9a7993903d86..1b9a3e3f7794 100644 --- a/lld/ELF/DWARF.h +++ b/lld/ELF/DWARF.h @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/ELF.h" +#include <optional> namespace lld::elf { @@ -73,7 +74,7 @@ public: StringRef getLineStrSection() const override { return lineStrSection; } bool isLittleEndian() const override { - return ELFT::TargetEndianness == llvm::support::little; + return ELFT::TargetEndianness == llvm::endianness::little; } std::optional<llvm::RelocAddrEntry> find(const llvm::DWARFSection &sec, diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index c2059c70e15a..6bef09eeca01 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -52,6 +52,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" @@ -103,8 +104,13 @@ void Ctx::reset() { nonPrevailingSyms.clear(); whyExtractRecords.clear(); backwardReferences.clear(); + auxiliaryFiles.clear(); hasSympart.store(false, std::memory_order_relaxed); + hasTlsIe.store(false, std::memory_order_relaxed); needsTlsLd.store(false, std::memory_order_relaxed); + scriptSymOrderCounter = 1; + scriptSymOrder.clear(); + ltoAllVtablesHaveTypeInfos = false; } llvm::raw_fd_ostream Ctx::openAuxiliaryFile(llvm::StringRef filename, @@ -239,6 +245,19 @@ static bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; } +bool LinkerDriver::tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive, bool lazy) { + if (!config->fatLTOObjects) + return false; + Expected<MemoryBufferRef> fatLTOData = + IRObjectFile::findBitcodeInMemBuffer(mb); + if (errorToBool(fatLTOData.takeError())) + return false; + files.push_back( + make<BitcodeFile>(*fatLTOData, archiveName, offsetInArchive, lazy)); + return true; +} + // Opens a file and create a file object. Path has to be resolved already. void LinkerDriver::addFile(StringRef path, bool withLOption) { using namespace sys::fs; @@ -263,7 +282,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { for (const std::pair<MemoryBufferRef, uint64_t> &p : members) { if (isBitcode(p.first)) files.push_back(make<BitcodeFile>(p.first, path, p.second, false)); - else + else if (!tryAddFatLTOFile(p.first, path, p.second, false)) files.push_back(createObjFile(p.first, path)); } return; @@ -287,9 +306,10 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { InputFile::isInGroup = true; for (const std::pair<MemoryBufferRef, uint64_t> &p : members) { auto magic = identify_magic(p.first.getBuffer()); - if (magic == file_magic::elf_relocatable) - files.push_back(createObjFile(p.first, path, true)); - else if (magic == file_magic::bitcode) + if (magic == file_magic::elf_relocatable) { + if (!tryAddFatLTOFile(p.first, path, p.second, true)) + files.push_back(createObjFile(p.first, path, true)); + } else if (magic == file_magic::bitcode) files.push_back(make<BitcodeFile>(p.first, path, p.second, true)); else warn(path + ": archive member '" + p.first.getBufferIdentifier() + @@ -321,7 +341,8 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { files.push_back(make<BitcodeFile>(mbref, "", 0, inLib)); break; case file_magic::elf_relocatable: - files.push_back(createObjFile(mbref, "", inLib)); + if (!tryAddFatLTOFile(mbref, "", 0, inLib)) + files.push_back(createObjFile(mbref, "", inLib)); break; default: error(path + ": unknown file type"); @@ -786,13 +807,6 @@ static int getMemtagMode(opt::InputArgList &args) { return ELF::NT_MEMTAG_LEVEL_NONE; } - if (!config->androidMemtagHeap && !config->androidMemtagStack) { - error("when using --android-memtag-mode, at least one of " - "--android-memtag-heap or " - "--android-memtag-stack is required"); - return ELF::NT_MEMTAG_LEVEL_NONE; - } - if (memtagModeArg == "sync") return ELF::NT_MEMTAG_LEVEL_SYNC; if (memtagModeArg == "async") @@ -1023,6 +1037,74 @@ template <class ELFT> static void readCallGraphsFromObjectFiles() { } } +template <class ELFT> +static void ltoValidateAllVtablesHaveTypeInfos(opt::InputArgList &args) { + DenseSet<StringRef> typeInfoSymbols; + SmallSetVector<StringRef, 0> vtableSymbols; + auto processVtableAndTypeInfoSymbols = [&](StringRef name) { + if (name.consume_front("_ZTI")) + typeInfoSymbols.insert(name); + else if (name.consume_front("_ZTV")) + vtableSymbols.insert(name); + }; + + // Examine all native symbol tables. + for (ELFFileBase *f : ctx.objectFiles) { + using Elf_Sym = typename ELFT::Sym; + for (const Elf_Sym &s : f->template getGlobalELFSyms<ELFT>()) { + if (s.st_shndx != SHN_UNDEF) { + StringRef name = check(s.getName(f->getStringTable())); + processVtableAndTypeInfoSymbols(name); + } + } + } + + for (SharedFile *f : ctx.sharedFiles) { + using Elf_Sym = typename ELFT::Sym; + for (const Elf_Sym &s : f->template getELFSyms<ELFT>()) { + if (s.st_shndx != SHN_UNDEF) { + StringRef name = check(s.getName(f->getStringTable())); + processVtableAndTypeInfoSymbols(name); + } + } + } + + SmallSetVector<StringRef, 0> vtableSymbolsWithNoRTTI; + for (StringRef s : vtableSymbols) + if (!typeInfoSymbols.count(s)) + vtableSymbolsWithNoRTTI.insert(s); + + // Remove known safe symbols. + for (auto *arg : args.filtered(OPT_lto_known_safe_vtables)) { + StringRef knownSafeName = arg->getValue(); + if (!knownSafeName.consume_front("_ZTV")) + error("--lto-known-safe-vtables=: expected symbol to start with _ZTV, " + "but got " + + knownSafeName); + vtableSymbolsWithNoRTTI.remove(knownSafeName); + } + + ctx.ltoAllVtablesHaveTypeInfos = vtableSymbolsWithNoRTTI.empty(); + // Check for unmatched RTTI symbols + for (StringRef s : vtableSymbolsWithNoRTTI) { + message( + "--lto-validate-all-vtables-have-type-infos: RTTI missing for vtable " + "_ZTV" + + s + ", --lto-whole-program-visibility disabled"); + } +} + +static CGProfileSortKind getCGProfileSortKind(opt::InputArgList &args) { + StringRef s = args.getLastArgValue(OPT_call_graph_profile_sort, "cdsort"); + if (s == "hfsort") + return CGProfileSortKind::Hfsort; + if (s == "cdsort") + return CGProfileSortKind::Cdsort; + if (s != "none") + error("unknown --call-graph-profile-sort= value: " + s); + return CGProfileSortKind::None; +} + static DebugCompressionType getCompressionType(StringRef s, StringRef option) { DebugCompressionType type = StringSwitch<DebugCompressionType>(s) .Case("zlib", DebugCompressionType::Zlib) @@ -1121,7 +1203,7 @@ static bool remapInputs(StringRef line, const Twine &location) { else if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) config->remapInputsWildcards.emplace_back(std::move(*pat), fields[1]); else { - error(location + ": " + toString(pat.takeError())); + error(location + ": " + toString(pat.takeError()) + ": " + fields[0]); return true; } return false; @@ -1141,19 +1223,24 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_android_memtag_heap, OPT_no_android_memtag_heap, false); config->androidMemtagStack = args.hasFlag(OPT_android_memtag_stack, OPT_no_android_memtag_stack, false); + config->fatLTOObjects = + args.hasFlag(OPT_fat_lto_objects, OPT_no_fat_lto_objects, false); config->androidMemtagMode = getMemtagMode(args); config->auxiliaryList = args::getStrings(args, OPT_auxiliary); config->armBe8 = args.hasArg(OPT_be8); - if (opt::Arg *arg = - args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions, - OPT_Bsymbolic_functions, OPT_Bsymbolic)) { + if (opt::Arg *arg = args.getLastArg( + OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions, + OPT_Bsymbolic_functions, OPT_Bsymbolic_non_weak, OPT_Bsymbolic)) { if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions)) config->bsymbolic = BsymbolicKind::NonWeakFunctions; else if (arg->getOption().matches(OPT_Bsymbolic_functions)) config->bsymbolic = BsymbolicKind::Functions; + else if (arg->getOption().matches(OPT_Bsymbolic_non_weak)) + config->bsymbolic = BsymbolicKind::NonWeak; else if (arg->getOption().matches(OPT_Bsymbolic)) config->bsymbolic = BsymbolicKind::All; } + config->callGraphProfileSort = getCGProfileSortKind(args); config->checkSections = args.hasFlag(OPT_check_sections, OPT_no_check_sections, true); config->chroot = args.getLastArgValue(OPT_chroot); @@ -1174,8 +1261,6 @@ static void readConfigs(opt::InputArgList &args) { args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false); config->emitLLVM = args.hasArg(OPT_plugin_opt_emit_llvm, false); config->emitRelocs = args.hasArg(OPT_emit_relocs); - config->callGraphProfileSort = args.hasFlag( - OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true); config->enableNewDtags = args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true); config->entry = args.getLastArgValue(OPT_entry); @@ -1219,6 +1304,9 @@ static void readConfigs(opt::InputArgList &args) { config->ltoWholeProgramVisibility = args.hasFlag(OPT_lto_whole_program_visibility, OPT_no_lto_whole_program_visibility, false); + config->ltoValidateAllVtablesHaveTypeInfos = + args.hasFlag(OPT_lto_validate_all_vtables_have_type_infos, + OPT_no_lto_validate_all_vtables_have_type_infos, false); config->ltoo = args::getInteger(args, OPT_lto_O, 2); if (config->ltoo > 3) error("invalid optimization level for LTO: " + Twine(config->ltoo)); @@ -1422,7 +1510,7 @@ static void readConfigs(opt::InputArgList &args) { else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first)) config->shuffleSections.emplace_back(std::move(*pat), uint32_t(v)); else - error(errPrefix + toString(pat.takeError())); + error(errPrefix + toString(pat.takeError()) + ": " + kv.first); } auto reports = {std::make_pair("bti-report", &config->zBtiReport), @@ -1460,7 +1548,7 @@ static void readConfigs(opt::InputArgList &args) { else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first)) config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v); else - error(errPrefix + toString(pat.takeError())); + error(errPrefix + toString(pat.takeError()) + ": " + kv.first); } cl::ResetAllOptionOccurrences(); @@ -1569,8 +1657,8 @@ static void readConfigs(opt::InputArgList &args) { // Page alignment can be disabled by the -n (--nmagic) and -N (--omagic). // As PT_GNU_RELRO relies on Paging, do not create it when we have disabled - // it. - if (config->nmagic || config->omagic) + // it. Also disable RELRO for -r. + if (config->nmagic || config->omagic || config->relocatable) config->zRelro = false; std::tie(config->buildId, config->buildIdVector) = getBuildId(args); @@ -1591,7 +1679,7 @@ static void readConfigs(opt::InputArgList &args) { config->symbolOrderingFile = getSymbolOrderingFile(*buffer); // Also need to disable CallGraphProfileSort to prevent // LLD order symbols with CGProfile - config->callGraphProfileSort = false; + config->callGraphProfileSort = CGProfileSortKind::None; } } @@ -1617,7 +1705,8 @@ static void readConfigs(opt::InputArgList &args) { if (Expected<GlobPattern> pat = GlobPattern::create(pattern)) config->warnBackrefsExclude.push_back(std::move(*pat)); else - error(arg->getSpelling() + ": " + toString(pat.takeError())); + error(arg->getSpelling() + ": " + toString(pat.takeError()) + ": " + + pattern); } // For -no-pie and -pie, --export-dynamic-symbol specifies defined symbols @@ -1691,14 +1780,10 @@ static void setConfigs(opt::InputArgList &args) { OPT_no_apply_dynamic_relocs, false) || !config->isRela; // Validation of dynamic relocation addends is on by default for assertions - // builds (for supported targets) and disabled otherwise. Ideally we would - // enable the debug checks for all targets, but currently not all targets - // have support for reading Elf_Rel addends, so we only enable for a subset. + // builds and disabled otherwise. This check is enabled when writeAddends is + // true. #ifndef NDEBUG - bool checkDynamicRelocsDefault = m == EM_AARCH64 || m == EM_ARM || - m == EM_386 || m == EM_LOONGARCH || - m == EM_MIPS || m == EM_RISCV || - m == EM_X86_64; + bool checkDynamicRelocsDefault = true; #else bool checkDynamicRelocsDefault = false; #endif @@ -1975,7 +2060,7 @@ static void handleUndefined(Symbol *sym, const char *option) { static void handleUndefinedGlob(StringRef arg) { Expected<GlobPattern> pat = GlobPattern::create(arg); if (!pat) { - error("--undefined-glob: " + toString(pat.takeError())); + error("--undefined-glob: " + toString(pat.takeError()) + ": " + arg); return; } @@ -2163,24 +2248,6 @@ static void replaceCommonSymbols() { } } -// If all references to a DSO happen to be weak, the DSO is not added to -// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid -// dangling references to an unneeded DSO. Use a weak binding to avoid -// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols. -static void demoteSharedAndLazySymbols() { - llvm::TimeTraceScope timeScope("Demote shared and lazy symbols"); - for (Symbol *sym : symtab.getSymbols()) { - auto *s = dyn_cast<SharedSymbol>(sym); - if (!(s && !cast<SharedFile>(s->file)->isNeeded) && !sym->isLazy()) - continue; - - uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); - Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) - .overwrite(*sym); - sym->versionId = VER_NDX_GLOBAL; - } -} - // The section referred to by `s` is considered address-significant. Set the // keepUnique flag on the section if appropriate. static void markAddrsig(Symbol *s) { @@ -2229,7 +2296,7 @@ static void findKeepUniqueSections(opt::InputArgList &args) { const uint8_t *cur = contents.begin(); while (cur != contents.end()) { unsigned size; - const char *err; + const char *err = nullptr; uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err); if (err) fatal(toString(f) + ": could not decode addrsig section: " + err); @@ -2801,6 +2868,10 @@ void LinkerDriver::link(opt::InputArgList &args) { config->ltoEmitAsm || !config->thinLTOModulesToCompile.empty(); + // Handle --lto-validate-all-vtables-have-type-infos. + if (config->ltoValidateAllVtablesHaveTypeInfos) + invokeELFT(ltoValidateAllVtablesHaveTypeInfos, args); + // Do link-time optimization if given files are LLVM bitcode files. // This compiles bitcode files into real object files. // @@ -2934,12 +3005,16 @@ void LinkerDriver::link(opt::InputArgList &args) { // Garbage collection and removal of shared symbols from unused shared objects. invokeELFT(markLive,); - demoteSharedAndLazySymbols(); // Make copies of any input sections that need to be copied into each // partition. copySectionsIntoPartitions(); + if (canHaveMemtagGlobals()) { + llvm::TimeTraceScope timeScope("Process memory tagged symbols"); + createTaggedSymbols(ctx.objectFiles); + } + // Create synthesized sections such as .got and .plt. This is called before // processSectionCommands() so that they can be placed by SECTIONS commands. invokeELFT(createSyntheticSections,); @@ -2987,7 +3062,7 @@ void LinkerDriver::link(opt::InputArgList &args) { } // Read the callgraph now that we know what was gced or icfed - if (config->callGraphProfileSort) { + if (config->callGraphProfileSort != CGProfileSortKind::None) { if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file)) if (std::optional<MemoryBufferRef> buffer = readFile(arg->getValue())) readCallGraph(*buffer); diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index 16cb52229e71..29a2b04af711 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -25,7 +25,7 @@ public: // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, -#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), #include "Options.inc" #undef OPTION }; diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 5e3f6d1459d8..0a27422e3b2d 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -43,9 +43,7 @@ using namespace lld::elf; // Create table mapping all options defined in Options.td static constexpr opt::OptTable::Info optInfo[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ - {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \ - X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__), #include "Options.inc" #undef OPTION }; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index d96b47b3585b..cc2c5916e05c 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/RISCVAttributeParser.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/raw_ostream.h" +#include <optional> using namespace llvm; using namespace llvm::ELF; @@ -40,6 +41,13 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; +// This function is explicity instantiated in ARM.cpp, don't do it here to avoid +// warnings with MSVC. +extern template void ObjFile<ELF32LE>::importCmseSymbols(); +extern template void ObjFile<ELF32BE>::importCmseSymbols(); +extern template void ObjFile<ELF64LE>::importCmseSymbols(); +extern template void ObjFile<ELF64BE>::importCmseSymbols(); + bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; @@ -284,13 +292,6 @@ template <class ELFT> static void doParseFile(InputFile *file) { if (!isCompatible(file)) return; - // Binary file - if (auto *f = dyn_cast<BinaryFile>(file)) { - ctx.binaryFiles.push_back(f); - f->parse(); - return; - } - // Lazy object file if (file->lazy) { if (auto *f = dyn_cast<BitcodeFile>(file)) { @@ -305,27 +306,30 @@ template <class ELFT> static void doParseFile(InputFile *file) { if (config->trace) message(toString(file)); - // .so file - if (auto *f = dyn_cast<SharedFile>(file)) { + if (file->kind() == InputFile::ObjKind) { + ctx.objectFiles.push_back(cast<ELFFileBase>(file)); + cast<ObjFile<ELFT>>(file)->parse(); + } else if (auto *f = dyn_cast<SharedFile>(file)) { f->parse<ELFT>(); - return; - } - - // LLVM bitcode file - if (auto *f = dyn_cast<BitcodeFile>(file)) { + } else if (auto *f = dyn_cast<BitcodeFile>(file)) { ctx.bitcodeFiles.push_back(f); f->parse(); - return; + } else { + ctx.binaryFiles.push_back(cast<BinaryFile>(file)); + cast<BinaryFile>(file)->parse(); } - - // Regular object file - ctx.objectFiles.push_back(cast<ELFFileBase>(file)); - cast<ObjFile<ELFT>>(file)->parse(); } // Add symbols in File to the symbol table. void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } +// This function is explicity instantiated in ARM.cpp. Mark it extern here, +// to avoid warnings when building with MSVC. +extern template void ObjFile<ELF32LE>::importCmseSymbols(); +extern template void ObjFile<ELF32BE>::importCmseSymbols(); +extern template void ObjFile<ELF64LE>::importCmseSymbols(); +extern template void ObjFile<ELF64BE>::importCmseSymbols(); + template <class ELFT> static void doParseArmCMSEImportLib(InputFile *file) { cast<ObjFile<ELFT>>(file)->importCmseSymbols(); } @@ -345,7 +349,7 @@ static std::string createFileLineMsg(StringRef path, unsigned line) { template <class ELFT> static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, - InputSectionBase &sec, uint64_t offset) { + const InputSectionBase &sec, uint64_t offset) { // In DWARF, functions and variables are stored to different places. // First, look up a function for a given offset. if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) @@ -360,7 +364,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, return std::string(file.sourceFile); } -std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, +std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec, uint64_t offset) { if (kind() != ObjKind) return ""; @@ -471,8 +475,8 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) { // Returns source line information for a given offset // using DWARF debug info. template <class ELFT> -std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, - uint64_t offset) { +std::optional<DILineInfo> +ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) { // Detect SectionIndex for specified section. uint64_t sectionIndex = object::SectionedAddress::UndefSection; ArrayRef<InputSectionBase *> sections = s->file->getSections(); @@ -601,9 +605,9 @@ template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { check(this->getObj().getSectionContents(sec)); StringRef name = check(obj.getSectionName(sec, shstrtab)); this->sections[i] = &InputSection::discarded; - if (Error e = - attributes.parse(contents, ekind == ELF32LEKind ? support::little - : support::big)) { + if (Error e = attributes.parse(contents, ekind == ELF32LEKind + ? llvm::endianness::little + : llvm::endianness::big)) { InputSection isec(*this, sec, name); warn(toString(&isec) + ": " + llvm::toString(std::move(e))); } else { @@ -621,6 +625,16 @@ template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { } } + // Producing a static binary with MTE globals is not currently supported, + // remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused + // medatada, and we don't want them to end up in the output file for static + // executables. + if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC && + !canHaveMemtagGlobals()) { + this->sections[i] = &InputSection::discarded; + continue; + } + if (sec.sh_type != SHT_GROUP) continue; StringRef signature = getShtGroupSignature(objSections, sec); @@ -1533,7 +1547,7 @@ template <class ELFT> void SharedFile::parse() { SharedSymbol{*this, name, sym.getBinding(), sym.st_other, sym.getType(), sym.st_value, sym.st_size, alignment}); if (s->file == this) - s->verdefIndex = ver; + s->versionId = ver; } // Also add the symbol with the versioned name to handle undefined symbols @@ -1550,7 +1564,7 @@ template <class ELFT> void SharedFile::parse() { SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other, sym.getType(), sym.st_value, sym.st_size, alignment}); if (s->file == this) - s->verdefIndex = idx; + s->versionId = idx; } } @@ -1569,7 +1583,9 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { case Triple::r600: return EM_AMDGPU; case Triple::arm: + case Triple::armeb: case Triple::thumb: + case Triple::thumbeb: return EM_ARM; case Triple::avr: return EM_AVR; @@ -1594,6 +1610,8 @@ static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { case Triple::riscv32: case Triple::riscv64: return EM_RISCV; + case Triple::sparcv9: + return EM_SPARCV9; case Triple::x86: return t.isOSIAMCU() ? EM_IAMCU : EM_386; case Triple::x86_64: @@ -1756,15 +1774,15 @@ void BinaryFile::parse() { llvm::StringSaver &saver = lld::saver(); - symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"), + symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_start"), STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, section}); - symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"), - STB_GLOBAL, STV_DEFAULT, STT_OBJECT, - data.size(), 0, section}); - symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"), - STB_GLOBAL, STV_DEFAULT, STT_OBJECT, - data.size(), 0, nullptr}); + symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_end"), STB_GLOBAL, + STV_DEFAULT, STT_OBJECT, data.size(), 0, + section}); + symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_size"), STB_GLOBAL, + STV_DEFAULT, STT_OBJECT, data.size(), 0, + nullptr}); } ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, @@ -1809,7 +1827,7 @@ template <class ELFT> void ObjFile<ELFT>::parseLazy() { } } -bool InputFile::shouldExtractForCommon(StringRef name) { +bool InputFile::shouldExtractForCommon(StringRef name) const { if (isa<BitcodeFile>(this)) return isBitcodeNonCommonDef(mb, name, archiveName); diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index cc658bdc2319..ab98d78fcf14 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -101,7 +101,7 @@ public: // Check if a non-common symbol should be extracted to override a common // definition. - bool shouldExtractForCommon(StringRef name); + bool shouldExtractForCommon(StringRef name) const; // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute // offsets in PLT call stubs. @@ -133,7 +133,7 @@ public: // True if this is an argument for --just-symbols. Usually false. bool justSymbols = false; - std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec, + std::string getSrcMsg(const Symbol &sym, const InputSectionBase &sec, uint64_t offset); // On PPC64 we need to keep track of which files contain small code model @@ -255,7 +255,8 @@ public: return getSymbol(symIndex); } - std::optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t); + std::optional<llvm::DILineInfo> getDILineInfo(const InputSectionBase *, + uint64_t); std::optional<std::pair<std::string, unsigned>> getVariableLoc(StringRef name); @@ -289,7 +290,6 @@ public: void initSectionsAndLocalSyms(bool ignoreComdats); void postParse(); void importCmseSymbols(); - void redirectCmseSymbols(); private: void initializeSections(bool ignoreComdats, diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 2edaa2b40493..81468a20dfb5 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/xxhash.h" #include <algorithm> #include <mutex> +#include <optional> #include <vector> using namespace llvm; @@ -241,18 +242,19 @@ InputSection *InputSectionBase::getLinkOrderDep() const { return cast<InputSection>(file->getSections()[link]); } -// Find a function symbol that encloses a given location. -Defined *InputSectionBase::getEnclosingFunction(uint64_t offset) { +// Find a symbol that encloses a given location. +Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, + uint8_t type) const { for (Symbol *b : file->getSymbols()) if (Defined *d = dyn_cast<Defined>(b)) - if (d->section == this && d->type == STT_FUNC && d->value <= offset && - offset < d->value + d->size) + if (d->section == this && d->value <= offset && + offset < d->value + d->size && (type == 0 || type == d->type)) return d; return nullptr; } // Returns an object file location string. Used to construct an error message. -std::string InputSectionBase::getLocation(uint64_t offset) { +std::string InputSectionBase::getLocation(uint64_t offset) const { std::string secAndOffset = (name + "+0x" + Twine::utohexstr(offset) + ")").str(); @@ -273,7 +275,8 @@ std::string InputSectionBase::getLocation(uint64_t offset) { // foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42) // // Returns an empty string if there's no way to get line info. -std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) { +std::string InputSectionBase::getSrcMsg(const Symbol &sym, + uint64_t offset) const { return file->getSrcMsg(sym, *this, offset); } @@ -286,7 +289,7 @@ std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) { // or // // path/to/foo.o:(function bar) in archive path/to/bar.a -std::string InputSectionBase::getObjMsg(uint64_t off) { +std::string InputSectionBase::getObjMsg(uint64_t off) const { std::string filename = std::string(file->getName()); std::string archive; @@ -296,10 +299,8 @@ std::string InputSectionBase::getObjMsg(uint64_t off) { // Find a symbol that encloses a given location. getObjMsg may be called // before ObjFile::initSectionsAndLocalSyms where local symbols are // initialized. - for (Symbol *b : file->getSymbols()) - if (auto *d = dyn_cast_or_null<Defined>(b)) - if (d->section == this && d->value <= off && off < d->value + d->size) - return filename + ":(" + toString(*d) + ")" + archive; + if (Defined *d = getEnclosingSymbol(off)) + return filename + ":(" + toString(*d) + ")" + archive; // If there's no symbol, print out the offset in the section. return (filename + ":(" + name + "+0x" + utohexstr(off) + ")" + archive) @@ -349,29 +350,61 @@ InputSectionBase *InputSection::getRelocatedSection() const { return sections[info]; } +template <class ELFT, class RelTy> +void InputSection::copyRelocations(uint8_t *buf) { + if (config->relax && !config->relocatable && config->emachine == EM_RISCV) { + // On RISC-V, relaxation might change relocations: copy from internal ones + // that are updated by relaxation. + InputSectionBase *sec = getRelocatedSection(); + copyRelocations<ELFT, RelTy>(buf, llvm::make_range(sec->relocations.begin(), + sec->relocations.end())); + } else { + // Convert the raw relocations in the input section into Relocation objects + // suitable to be used by copyRelocations below. + struct MapRel { + const ObjFile<ELFT> &file; + Relocation operator()(const RelTy &rel) const { + // RelExpr is not used so set to a dummy value. + return Relocation{R_NONE, rel.getType(config->isMips64EL), rel.r_offset, + getAddend<ELFT>(rel), &file.getRelocTargetSym(rel)}; + } + }; + + using RawRels = ArrayRef<RelTy>; + using MapRelIter = + llvm::mapped_iterator<typename RawRels::iterator, MapRel>; + auto mapRel = MapRel{*getFile<ELFT>()}; + RawRels rawRels = getDataAs<RelTy>(); + auto rels = llvm::make_range(MapRelIter(rawRels.begin(), mapRel), + MapRelIter(rawRels.end(), mapRel)); + copyRelocations<ELFT, RelTy>(buf, rels); + } +} + // This is used for -r and --emit-relocs. We can't use memcpy to copy // relocations because we need to update symbol table offset and section index // for each relocation. So we copy relocations one by one. -template <class ELFT, class RelTy> -void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { +template <class ELFT, class RelTy, class RelIt> +void InputSection::copyRelocations(uint8_t *buf, + llvm::iterator_range<RelIt> rels) { const TargetInfo &target = *elf::target; InputSectionBase *sec = getRelocatedSection(); (void)sec->contentMaybeDecompress(); // uncompress if needed - for (const RelTy &rel : rels) { - RelType type = rel.getType(config->isMips64EL); + for (const Relocation &rel : rels) { + RelType type = rel.type; const ObjFile<ELFT> *file = getFile<ELFT>(); - Symbol &sym = file->getRelocTargetSym(rel); + Symbol &sym = *rel.sym; auto *p = reinterpret_cast<typename ELFT::Rela *>(buf); buf += sizeof(RelTy); if (RelTy::IsRela) - p->r_addend = getAddend<ELFT>(rel); + p->r_addend = rel.addend; // Output section VA is zero for -r, so r_offset is an offset within the // section, but for --emit-relocs it is a virtual address. - p->r_offset = sec->getVA(rel.r_offset); + p->r_offset = sec->getVA(rel.offset); p->setSymbolAndType(in.symTab->getSymbolIndex(&sym), type, config->isMips64EL); @@ -403,13 +436,10 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { continue; } SectionBase *section = d->section; - if (!section->isLive()) { - p->setSymbolAndType(0, 0, false); - continue; - } + assert(section->isLive()); - int64_t addend = getAddend<ELFT>(rel); - const uint8_t *bufLoc = sec->content().begin() + rel.r_offset; + int64_t addend = rel.addend; + const uint8_t *bufLoc = sec->content().begin() + rel.offset; if (!RelTy::IsRela) addend = target.getImplicitAddend(bufLoc, type); @@ -431,8 +461,14 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef<RelTy> rels) { if (RelTy::IsRela) p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; - else if (config->relocatable && type != target.noneRel) - sec->addReloc({R_ABS, type, rel.r_offset, addend, &sym}); + // For SHF_ALLOC sections relocated by REL, append a relocation to + // sec->relocations so that relocateAlloc transitively called by + // writeSections will update the implicit addend. Non-SHF_ALLOC sections + // utilize relocateNonAlloc to process raw relocations and do not need + // this sec->relocations change. + else if (config->relocatable && (sec->flags & SHF_ALLOC) && + type != target.noneRel) + sec->addReloc({R_ABS, type, rel.offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && p->r_addend >= 0x8000 && sec->file->ppc32Got2) { // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 @@ -839,6 +875,16 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, } } +// Overwrite a ULEB128 value and keep the original length. +static uint64_t overwriteULEB128(uint8_t *bufLoc, uint64_t val) { + while (*bufLoc & 0x80) { + *bufLoc++ = 0x80 | (val & 0x7f); + val >>= 7; + } + *bufLoc = val; + return val; +} + // This function applies relocations to sections without SHF_ALLOC bit. // Such sections are never mapped to memory at runtime. Debug sections are // an example. Relocations in non-alloc sections are much easier to @@ -850,6 +896,7 @@ template <class ELFT, class RelTy> void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; const TargetInfo &target = *elf::target; + const auto emachine = config->emachine; const bool isDebug = isDebugSection(*this); const bool isDebugLocOrRanges = isDebug && (name == ".debug_loc" || name == ".debug_ranges"); @@ -861,17 +908,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { break; } - for (const RelTy &rel : rels) { - RelType type = rel.getType(config->isMips64EL); - - // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations - // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed - // in 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we - // need to keep this bug-compatible code for a while. - if (config->emachine == EM_386 && type == R_386_GOTPC) - continue; - - uint64_t offset = rel.r_offset; + for (size_t i = 0, relsSize = rels.size(); i != relsSize; ++i) { + const RelTy &rel = rels[i]; + const RelType type = rel.getType(config->isMips64EL); + const uint64_t offset = rel.r_offset; uint8_t *bufLoc = buf + offset; int64_t addend = getAddend<ELFT>(rel); if (!RelTy::IsRela) @@ -881,6 +921,30 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { RelExpr expr = target.getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; + auto *ds = dyn_cast<Defined>(&sym); + + if (emachine == EM_RISCV && type == R_RISCV_SET_ULEB128) { + if (++i < relsSize && + rels[i].getType(/*isMips64EL=*/false) == R_RISCV_SUB_ULEB128 && + rels[i].r_offset == offset) { + uint64_t val; + if (!ds && tombstone) { + val = *tombstone; + } else { + val = sym.getVA(addend) - + (getFile<ELFT>()->getRelocTargetSym(rels[i]).getVA(0) + + getAddend<ELFT>(rels[i])); + } + if (overwriteULEB128(bufLoc, val) >= 0x80) + errorOrWarn(getLocation(offset) + ": ULEB128 value " + Twine(val) + + " exceeds available space; references '" + + lld::toString(sym) + "'"); + continue; + } + errorOrWarn(getLocation(offset) + + ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128"); + return; + } if (tombstone || (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { @@ -912,7 +976,6 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. - auto *ds = dyn_cast<Defined>(&sym); if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64<bits>(*tombstone) @@ -922,29 +985,31 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { } } - // For a relocatable link, only tombstone values are applied. - if (config->relocatable) + // For a relocatable link, content relocated by RELA remains unchanged and + // we can stop here, while content relocated by REL referencing STT_SECTION + // needs updating implicit addends. + if (config->relocatable && (RelTy::IsRela || sym.type != STT_SECTION)) continue; - if (expr == R_SIZE) { - target.relocateNoSym(bufLoc, type, - SignExtend64<bits>(sym.getSize() + addend)); - continue; - } - // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC // sections. - if (expr == R_ABS || expr == R_DTPREL || expr == R_GOTPLTREL || + if (LLVM_LIKELY(expr == R_ABS) || expr == R_DTPREL || expr == R_GOTPLTREL || expr == R_RISCV_ADD) { target.relocateNoSym(bufLoc, type, SignExtend64<bits>(sym.getVA(addend))); continue; } + if (expr == R_SIZE) { + target.relocateNoSym(bufLoc, type, + SignExtend64<bits>(sym.getSize() + addend)); + continue; + } + std::string msg = getLocation(offset) + ": has non-ABS relocation " + toString(type) + " against symbol '" + toString(sym) + "'"; - if (expr != R_PC && expr != R_ARM_PCA) { - error(msg); + if (expr != R_PC && !(emachine == EM_386 && type == R_386_GOTPC)) { + errorOrWarn(msg); return; } @@ -955,6 +1020,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { // relocations without any errors and relocate them as if they were at // address 0. For bug-compatibility, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. + // + // GCC 8.0 or earlier have a bug that they emit R_386_GOTPC relocations + // against _GLOBAL_OFFSET_TABLE_ for .debug_info. The bug has been fixed in + // 2017 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82630), but we need to + // keep this bug-compatible code for a while. warn(msg); target.relocateNoSym( bufLoc, type, @@ -962,23 +1032,6 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) { } } -// This is used when '-r' is given. -// For REL targets, InputSection::copyRelocations() may store artificial -// relocations aimed to update addends. They are handled in relocateAlloc() -// for allocatable sections, and this function does the same for -// non-allocatable sections, such as sections with debug information. -static void relocateNonAllocForRelocatable(InputSection *sec, uint8_t *buf) { - const unsigned bits = config->is64 ? 64 : 32; - - for (const Relocation &rel : sec->relocs()) { - // InputSection::copyRelocations() adds only R_ABS relocations. - assert(rel.expr == R_ABS); - uint8_t *bufLoc = buf + rel.offset; - uint64_t targetVA = SignExtend64(rel.sym->getVA(rel.addend), bits); - target->relocate(bufLoc, rel, targetVA); - } -} - template <class ELFT> void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { if ((flags & SHF_EXECINSTR) && LLVM_UNLIKELY(getFile<ELFT>()->splitStack)) @@ -990,8 +1043,6 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { } auto *sec = cast<InputSection>(this); - if (config->relocatable) - relocateNonAllocForRelocatable(sec, buf); // For a relocatable link, also call relocateNonAlloc() to rewrite applicable // locations with tombstone values. const RelsOrRelas<ELFT> rels = sec->template relsOrRelas<ELFT>(); @@ -1106,11 +1157,11 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) { // If -r or --emit-relocs is given, then an InputSection // may be a relocation section. if (LLVM_UNLIKELY(type == SHT_RELA)) { - copyRelocations<ELFT>(buf, getDataAs<typename ELFT::Rela>()); + copyRelocations<ELFT, typename ELFT::Rela>(buf); return; } if (LLVM_UNLIKELY(type == SHT_REL)) { - copyRelocations<ELFT>(buf, getDataAs<typename ELFT::Rel>()); + copyRelocations<ELFT, typename ELFT::Rel>(buf); return; } diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 15122d6abd6b..fbaea57bd586 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -189,14 +189,17 @@ public: InputSection *getLinkOrderDep() const; - // Get the function symbol that encloses this offset from within the - // section. - Defined *getEnclosingFunction(uint64_t offset); + // Get a symbol that encloses this offset from within the section. If type is + // not zero, return a symbol with the specified type. + Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const; + Defined *getEnclosingFunction(uint64_t offset) const { + return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC); + } // Returns a source location string. Used to construct an error message. - std::string getLocation(uint64_t offset); - std::string getSrcMsg(const Symbol &sym, uint64_t offset); - std::string getObjMsg(uint64_t offset); + std::string getLocation(uint64_t offset) const; + std::string getSrcMsg(const Symbol &sym, uint64_t offset) const; + std::string getObjMsg(uint64_t offset) const; // Each section knows how to relocate itself. These functions apply // relocations, assuming that Buf points to this section's copy in @@ -396,8 +399,10 @@ public: static InputSection discarded; private: - template <class ELFT, class RelTy> - void copyRelocations(uint8_t *buf, llvm::ArrayRef<RelTy> rels); + template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf); + + template <class ELFT, class RelTy, class RelIt> + void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels); template <class ELFT> void copyShtGroup(uint8_t *buf); }; diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index e8bfa903726d..504c12aac6c5 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -13,6 +13,7 @@ #include "Symbols.h" #include "lld/Common/Args.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Filesystem.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" #include "llvm/ADT/SmallString.h" @@ -40,32 +41,6 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -// Creates an empty file to store a list of object files for final -// linking of distributed ThinLTO. -static std::unique_ptr<raw_fd_ostream> openFile(StringRef file) { - std::error_code ec; - auto ret = - std::make_unique<raw_fd_ostream>(file, ec, sys::fs::OpenFlags::OF_None); - if (ec) { - error("cannot open " + file + ": " + ec.message()); - return nullptr; - } - return ret; -} - -// The merged bitcode after LTO is large. Try opening a file stream that -// supports reading, seeking and writing. Such a file allows BitcodeWriter to -// flush buffered data to reduce memory consumption. If this fails, open a file -// stream that supports only write. -static std::unique_ptr<raw_fd_ostream> openLTOOutputFile(StringRef file) { - std::error_code ec; - std::unique_ptr<raw_fd_ostream> fs = - std::make_unique<raw_fd_stream>(file, ec); - if (!ec) - return fs; - return openFile(file); -} - static std::string getThinLTOOutputFile(StringRef modulePath) { return lto::getThinLTOOutputFile(modulePath, config->thinLTOPrefixReplaceOld, config->thinLTOPrefixReplaceNew); @@ -152,6 +127,9 @@ static lto::Config createConfig() { c.DwoDir = std::string(config->dwoDir); c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.ValidateAllVtablesHaveTypeInfos = + config->ltoValidateAllVtablesHaveTypeInfos; + c.AllVtablesHaveTypeInfos = ctx.ltoAllVtablesHaveTypeInfos; c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty(); for (const llvm::StringRef &name : config->thinLTOModulesToCompile) @@ -174,7 +152,7 @@ static lto::Config createConfig() { } if (config->ltoEmitAsm) { - c.CGFileType = CGFT_AssemblyFile; + c.CGFileType = CodeGenFileType::AssemblyFile; c.Options.MCOptions.AsmVerbose = true; } diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 28e9f0461b2d..28ae4b854306 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -51,11 +51,8 @@ static bool isSectionPrefix(StringRef prefix, StringRef name) { } static StringRef getOutputSectionName(const InputSectionBase *s) { - if (config->relocatable) - return s->name; - - // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want - // to emit .rela.text.foo as .rela.text.bar for consistency (this is not + // This is for --emit-relocs and -r. If .text.foo is emitted as .text.bar, we + // want to emit .rela.text.foo as .rela.text.bar for consistency (this is not // technically required, but not doing it is odd). This code guarantees that. if (auto *isec = dyn_cast<InputSection>(s)) { if (InputSectionBase *rel = isec->getRelocatedSection()) { @@ -66,6 +63,9 @@ static StringRef getOutputSectionName(const InputSectionBase *s) { } } + if (config->relocatable) + return s->name; + // A BssSection created for a common symbol is identified as "COMMON" in // linker scripts. It should go to .bss section. if (s->name == "COMMON") @@ -122,11 +122,6 @@ uint64_t ExprValue::getSecAddr() const { } uint64_t ExprValue::getSectionOffset() const { - // If the alignment is trivial, we don't have to compute the full - // value to know the offset. This allows this function to succeed in - // cases where the output section is not yet known. - if (alignment == 1 && !sec) - return val; return getValue() - getSecAddr(); } @@ -174,9 +169,16 @@ void LinkerScript::expandOutputSection(uint64_t size) { void LinkerScript::setDot(Expr e, const Twine &loc, bool inSec) { uint64_t val = e().getValue(); - if (val < dot && inSec) - error(loc + ": unable to move location counter backward for: " + - state->outSec->name); + // If val is smaller and we are in an output section, record the error and + // report it if this is the last assignAddresses iteration. dot may be smaller + // if there is another assignAddresses iteration. + if (val < dot && inSec) { + backwardDotErr = + (loc + ": unable to move location counter (0x" + Twine::utohexstr(dot) + + ") backward to 0x" + Twine::utohexstr(val) + " for section '" + + state->outSec->name + "'") + .str(); + } // Update to location counter means update to section size. if (inSec) @@ -247,8 +249,13 @@ static void declareSymbol(SymbolAssignment *cmd) { Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0, nullptr); - // We can't calculate final value right now. + // If the symbol is already defined, its order is 0 (with absence indicating + // 0); otherwise it's assigned the order of the SymbolAssignment. Symbol *sym = symtab.insert(cmd->name); + if (!sym->isDefined()) + ctx.scriptSymOrder.insert({sym, cmd->symOrder}); + + // We can't calculate final value right now. sym->mergeProperties(newSym); newSym.overwrite(*sym); @@ -882,6 +889,10 @@ void LinkerScript::diagnoseOrphanHandling() const { if (config->orphanHandling == OrphanHandlingPolicy::Place) return; for (const InputSectionBase *sec : orphanSections) { + // .relro_padding is inserted before DATA_SEGMENT_RELRO_END, if present, + // automatically. The section is not supposed to be specified by scripts. + if (sec == in.relroPadding.get()) + continue; // Input SHT_REL[A] retained by --emit-relocs are ignored by // computeInputSections(). Don't warn/error. if (isa<InputSection>(sec) && @@ -1074,6 +1085,11 @@ void LinkerScript::assignOffsets(OutputSection *sec) { } } + // If .relro_padding is present, round up the end to a common-page-size + // boundary to protect the last page. + if (in.relroPadding && sec == in.relroPadding->getParent()) + expandOutputSection(alignToPowerOf2(dot, config->commonPageSize) - dot); + // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections // as they are not part of the process image. if (!(sec->flags & SHF_ALLOC)) { @@ -1151,10 +1167,13 @@ void LinkerScript::adjustOutputSections() { // * The address assignment. // The other option is to pick flags that minimize the impact the section // will have on the rest of the linker. That is why we copy the flags from - // the previous sections. Only a few flags are needed to keep the impact low. + // the previous sections. We copy just SHF_ALLOC and SHF_WRITE to keep the + // impact low. We do not propagate SHF_EXECINSTR as in some cases this can + // lead to executable writeable section. uint64_t flags = SHF_ALLOC; SmallVector<StringRef, 0> defPhdrs; + bool seenRelro = false; for (SectionCommand *&cmd : sectionCommands) { if (!isa<OutputDesc>(cmd)) continue; @@ -1176,8 +1195,8 @@ void LinkerScript::adjustOutputSections() { // We do not want to keep any special flags for output section // in case it is empty. if (isEmpty) - sec->flags = flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | - SHF_WRITE | SHF_EXECINSTR); + sec->flags = + flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | SHF_WRITE); // The code below may remove empty output sections. We should save the // specified program headers (if exist) and propagate them to subsequent @@ -1191,9 +1210,17 @@ void LinkerScript::adjustOutputSections() { if (sec->sectionIndex != UINT32_MAX) maybePropagatePhdrs(*sec, defPhdrs); + // Discard .relro_padding if we have not seen one RELRO section. Note: when + // .tbss is the only RELRO section, there is no associated PT_LOAD segment + // (needsPtLoad), so we don't append .relro_padding in the case. + if (in.relroPadding && in.relroPadding->getParent() == sec && !seenRelro) + discardable = true; if (discardable) { sec->markDead(); cmd = nullptr; + } else { + seenRelro |= + sec->relro && !(sec->type == SHT_NOBITS && (sec->flags & SHF_TLS)); } } @@ -1325,6 +1352,7 @@ const Defined *LinkerScript::assignAddresses() { state = &st; errorOnMissingSection = true; st.outSec = aether; + backwardDotErr.clear(); SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); for (SectionCommand *cmd : sectionCommands) { @@ -1476,7 +1504,9 @@ static void checkMemoryRegion(const MemoryRegion *region, } } -void LinkerScript::checkMemoryRegions() const { +void LinkerScript::checkFinalScriptConditions() const { + if (backwardDotErr.size()) + errorOrWarn(backwardDotErr); for (const OutputSection *sec : outputSections) { if (const MemoryRegion *memoryRegion = sec->memRegion) checkMemoryRegion(memoryRegion, sec, sec->addr); diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 8b8320f9f18e..18eaf58b785e 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -86,9 +86,9 @@ struct SectionCommand { // This represents ". = <expr>" or "<symbol> = <expr>". struct SymbolAssignment : SectionCommand { - SymbolAssignment(StringRef name, Expr e, std::string loc) + SymbolAssignment(StringRef name, Expr e, unsigned symOrder, std::string loc) : SectionCommand(AssignmentKind), name(name), expression(e), - location(loc) {} + symOrder(symOrder), location(loc) {} static bool classof(const SectionCommand *c) { return c->kind == AssignmentKind; @@ -105,6 +105,11 @@ struct SymbolAssignment : SectionCommand { bool provide = false; bool hidden = false; + // This assignment references DATA_SEGMENT_RELRO_END. + bool dataSegmentRelroEnd = false; + + unsigned symOrder; + // Holds file name and line number for error reporting. std::string location; @@ -340,8 +345,8 @@ public: // Describe memory region usage. void printMemoryUsage(raw_ostream &os); - // Verify memory/lma overflows. - void checkMemoryRegions() const; + // Check backward location counter assignment and memory region/LMA overflows. + void checkFinalScriptConditions() const; // SECTIONS command list. SmallVector<SectionCommand *, 0> sectionCommands; @@ -350,7 +355,10 @@ public: SmallVector<PhdrsCommand, 0> phdrsCommands; bool hasSectionsCommand = false; + bool seenDataAlign = false; + bool seenRelroEnd = false; bool errorOnMissingSection = false; + std::string backwardDotErr; // List of section patterns specified with KEEP commands. They will // be kept even if they are unused and --gc-sections is specified. diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp index 1b6dfcc57176..8b10ae183ae3 100644 --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -229,7 +229,7 @@ static void writeCref(raw_fd_ostream &os) { if (isa<SharedSymbol>(sym)) map[sym].insert(file); if (auto *d = dyn_cast<Defined>(sym)) - if (!d->isLocal() && (!d->section || d->section->isLive())) + if (!d->isLocal()) map[d].insert(file); } } diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 0d5c6c3d80a1..c2c9cabc92a4 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -42,6 +42,9 @@ def Bno_symbolic: F<"Bno-symbolic">, HelpText<"Don't bind default visibility def def Bsymbolic: F<"Bsymbolic">, HelpText<"Bind default visibility defined symbols locally for -shared">; +def Bsymbolic_non_weak: F<"Bsymbolic-non-weak">, + HelpText<"Bind default visibility defined STB_GLOBAL symbols locally for -shared">; + def Bsymbolic_functions: F<"Bsymbolic-functions">, HelpText<"Bind default visibility defined function symbols locally for -shared">; @@ -125,9 +128,12 @@ defm as_needed: B<"as-needed", defm call_graph_ordering_file: Eq<"call-graph-ordering-file", "Layout sections to optimize the given callgraph">; -defm call_graph_profile_sort: BB<"call-graph-profile-sort", - "Reorder sections with call graph profile (default)", - "Do not reorder sections with call graph profile">; +def call_graph_profile_sort: JJ<"call-graph-profile-sort=">, + HelpText<"Reorder input sections with call graph profile using the specified algorithm (default: cdsort)">, + MetaVarName<"[none,hfsort,cdsort]">, + Values<"none,hfsort,cdsort">; +def : FF<"no-call-graph-profile-sort">, Alias<call_graph_profile_sort>, AliasArgs<["none"]>, + Flags<[HelpHidden]>; // --chroot doesn't have a help text because it is an internal option. def chroot: Separate<["--"], "chroot">; @@ -601,9 +607,14 @@ def lto_cs_profile_file: JJ<"lto-cs-profile-file=">, defm lto_pgo_warn_mismatch: BB<"lto-pgo-warn-mismatch", "turn on warnings about profile cfg mismatch (default)", "turn off warnings about profile cfg mismatch">; +defm lto_known_safe_vtables : EEq<"lto-known-safe-vtables", + "When --lto-validate-all-vtables-have-type-infos is enabled, skip validation on these vtables (_ZTV symbols)">; def lto_obj_path_eq: JJ<"lto-obj-path=">; def lto_sample_profile: JJ<"lto-sample-profile=">, HelpText<"Sample profile file path">; +defm lto_validate_all_vtables_have_type_infos: BB<"lto-validate-all-vtables-have-type-infos", + "Validate that all vtables have type infos for LTO link", + "Do not validate that all vtables have type infos for LTO link">; defm lto_whole_program_visibility: BB<"lto-whole-program-visibility", "Asserts that the LTO link has whole program visibility", "Asserts that the LTO link does not have whole program visibility">; @@ -647,6 +658,10 @@ def thinlto_prefix_replace_eq: JJ<"thinlto-prefix-replace=">; def thinlto_single_module_eq: JJ<"thinlto-single-module=">, HelpText<"Specify a single module to compile in ThinLTO mode, for debugging only">; +defm fat_lto_objects: BB<"fat-lto-objects", + "Use the .llvm.lto section, which contains LLVM bitcode, in fat LTO object files to perform LTO.", + "Ignore the .llvm.lto section in relocatable object files (default).">; + def: J<"plugin-opt=O">, Alias<lto_O>, HelpText<"Alias for --lto-O">; def: F<"plugin-opt=debug-pass-manager">, Alias<lto_debug_pass_manager>, HelpText<"Alias for --lto-debug-pass-manager">; @@ -666,8 +681,6 @@ def: J<"plugin-opt=cs-profile-path=">, def: J<"plugin-opt=obj-path=">, Alias<lto_obj_path_eq>, HelpText<"Alias for --lto-obj-path=">; -def plugin_opt_opaque_pointers: F<"plugin-opt=opaque-pointers">, - HelpText<"Use opaque pointers in IR during LTO (default)">; def: J<"plugin-opt=opt-remarks-filename=">, Alias<opt_remarks_filename>, HelpText<"Alias for --opt-remarks-filename">; diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 0b9b76e5b832..ee9374186787 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -22,6 +22,8 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" #if LLVM_ENABLE_ZLIB +// Avoid introducing max as a macro from Windows headers. +#define NOMINMAX #include <zlib.h> #endif #if LLVM_ENABLE_ZSTD @@ -744,6 +746,12 @@ void OutputSection::checkDynRelAddends(const uint8_t *bufStart) { int64_t addend = rel.addend; const OutputSection *relOsec = rel.inputSec->getOutputSection(); assert(relOsec != nullptr && "missing output section for relocation"); + // Some targets have NOBITS synthetic sections with dynamic relocations + // with non-zero addends. Skip such sections. + if (is_contained({EM_PPC, EM_PPC64}, config->emachine) && + (rel.inputSec == in.ppc64LongBranchTarget.get() || + rel.inputSec == in.igotPlt.get())) + continue; const uint8_t *relocTarget = bufStart + relOsec->offset + rel.inputSec->getOffset(rel.offsetInSec); // For SHT_NOBITS the written addend is always zero. diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index af15f5a92546..fe3d7f419e84 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -53,6 +53,7 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/Demangle/Demangle.h" #include "llvm/Support/Endian.h" #include <algorithm> @@ -199,10 +200,7 @@ static bool needsPlt(RelExpr expr) { R_PPC32_PLTREL, R_PPC64_CALL_PLT>(expr); } -// Returns true if Expr refers a GOT entry. Note that this function -// returns false for TLS variables even though they need GOT, because -// TLS variables uses GOT differently than the regular variables. -static bool needsGot(RelExpr expr) { +bool lld::elf::needsGot(RelExpr expr) { return oneof<R_GOT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTPLT, R_AARCH64_GOT_PAGE, R_LOONGARCH_GOT, R_LOONGARCH_GOT_PAGE_PC>( @@ -311,7 +309,7 @@ static void replaceWithDefined(Symbol &sym, SectionBase &sec, uint64_t value, size, &sec) .overwrite(sym); - sym.verdefIndex = old.verdefIndex; + sym.versionId = old.versionId; sym.exportDynamic = true; sym.isUsedInRegularObj = true; // A copy relocated alias may need a GOT entry. @@ -509,8 +507,7 @@ int64_t RelocationScanner::computeMipsAddend(const RelTy &rel, RelExpr expr, template <class ELFT> static std::string maybeReportDiscarded(Undefined &sym) { auto *file = dyn_cast_or_null<ObjFile<ELFT>>(sym.file); - if (!file || !sym.discardedSecIdx || - file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) + if (!file || !sym.discardedSecIdx) return ""; ArrayRef<typename ELFT::Shdr> objSections = file->template getELFShdrs<ELFT>(); @@ -742,7 +739,10 @@ static void reportUndefinedSymbol(const UndefinedDiag &undef, uint64_t offset = l.offset; msg += "\n>>> referenced by "; - std::string src = sec.getSrcMsg(sym, offset); + // In the absence of line number information, utilize DW_TAG_variable (if + // present) for the enclosing symbol (e.g. var in `int *a[] = {&undef};`). + Symbol *enclosing = sec.getEnclosingSymbol(offset); + std::string src = sec.getSrcMsg(enclosing ? *enclosing : sym, offset); if (!src.empty()) msg += src + "\n>>> "; msg += sec.getObjMsg(offset); @@ -859,6 +859,23 @@ static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, RelType type) { Partition &part = isec.getPartition(); + if (sym.isTagged()) { + std::lock_guard<std::mutex> lock(relocMutex); + part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym, + addend, type, expr); + // With MTE globals, we always want to derive the address tag by `ldg`-ing + // the symbol. When we have a RELATIVE relocation though, we no longer have + // a reference to the symbol. Because of this, when we have an addend that + // puts the result of the RELATIVE relocation out-of-bounds of the symbol + // (e.g. the addend is outside of [0, sym.getSize()]), the AArch64 MemtagABI + // says we should store the offset to the start of the symbol in the target + // field. This is described in further detail in: + // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative + if (addend < 0 || static_cast<uint64_t>(addend) >= sym.getSize()) + isec.relocations.push_back({expr, type, offsetInSec, addend, &sym}); + return; + } + // Add a relative relocation. If relrDyn section is enabled, and the // relocation offset is guaranteed to be even, add the relocation to // the relrDyn section, otherwise add it to the relaDyn section. @@ -889,7 +906,7 @@ static void addPltEntry(PltSection &plt, GotPltSection &gotPlt, sym, 0, R_ABS}); } -static void addGotEntry(Symbol &sym) { +void elf::addGotEntry(Symbol &sym) { in.got->addEntry(sym); uint64_t off = sym.getGotOffset(); @@ -1040,6 +1057,10 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, } else if (!isAbsoluteValue(sym)) { expr = target->adjustGotPcExpr(type, addend, sec->content().data() + offset); + // If the target adjusted the expression to R_RELAX_GOT_PC, we may end up + // needing the GOT if we can't relax everything. + if (expr == R_RELAX_GOT_PC) + in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } } @@ -1556,7 +1577,8 @@ template <class ELFT> void elf::scanRelocations() { scanner.template scanSection<ELFT>(*sec); if (part.armExidx && part.armExidx->isLive()) for (InputSection *sec : part.armExidx->exidxSections) - scanner.template scanSection<ELFT>(*sec); + if (sec->isLive()) + scanner.template scanSection<ELFT>(*sec); } }); } @@ -1645,6 +1667,10 @@ void elf::postScanRelocations() { auto flags = sym.flags.load(std::memory_order_relaxed); if (handleNonPreemptibleIfunc(sym, flags)) return; + + if (sym.isTagged() && sym.isDefined()) + mainPart->memtagDescriptors->addSymbol(sym); + if (!sym.needsDynReloc()) return; sym.allocateAux(); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index e36215bd0d93..cfb9092149f3 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -101,6 +101,7 @@ enum RelExpr { R_PPC64_TOCBASE, R_PPC64_RELAX_GOT_PC, R_RISCV_ADD, + R_RISCV_LEB128, R_RISCV_PC_INDIRECT, // Same as R_PC but with page-aligned semantics. R_LOONGARCH_PAGE_PC, @@ -137,6 +138,7 @@ struct JumpInstrMod { template <class ELFT> void scanRelocations(); void reportUndefinedSymbols(); void postScanRelocations(); +void addGotEntry(Symbol &sym); void hexagonTLSSymbolUpdate(ArrayRef<OutputSection *> outputSections); bool hexagonNeedsTLSSymbol(ArrayRef<OutputSection *> outputSections); @@ -220,6 +222,11 @@ ArrayRef<RelTy> sortRels(ArrayRef<RelTy> rels, SmallVector<RelTy, 0> &storage) { } return rels; } + +// Returns true if Expr refers a GOT entry. Note that this function returns +// false for TLS variables even though they need GOT, because TLS variables uses +// GOT differently than the regular variables. +bool needsGot(RelExpr expr); } // namespace lld::elf #endif diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp index 3577e78c0d98..55b10f0c59b5 100644 --- a/lld/ELF/ScriptParser.cpp +++ b/lld/ELF/ScriptParser.cpp @@ -136,9 +136,6 @@ private: // True if a script being read is in the --sysroot directory. bool isUnderSysroot = false; - bool seenDataAlign = false; - bool seenRelroEnd = false; - // A set to detect an INCLUDE() cycle. StringSet<> seen; }; @@ -291,7 +288,7 @@ void ScriptParser::readDefsym(StringRef name) { Expr e = readExpr(); if (!atEOF()) setError("EOF expected, but got " + next()); - SymbolAssignment *cmd = make<SymbolAssignment>(name, e, getCurrentLocation()); + auto *cmd = make<SymbolAssignment>(name, e, 0, getCurrentLocation()); script->sectionCommands.push_back(cmd); } @@ -568,7 +565,7 @@ SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() { max = std::max(max, cast<OutputDesc>(cmd)->osec.size); return addrExpr().getValue() + max; }; - v.push_back(make<SymbolAssignment>(".", moveDot, getCurrentLocation())); + v.push_back(make<SymbolAssignment>(".", moveDot, 0, getCurrentLocation())); return v; } @@ -600,7 +597,7 @@ void ScriptParser::readSections() { // If DATA_SEGMENT_RELRO_END is absent, for sections after DATA_SEGMENT_ALIGN, // the relro fields should be cleared. - if (!seenRelroEnd) + if (!script->seenRelroEnd) for (SectionCommand *cmd : v) if (auto *osd = dyn_cast<OutputDesc>(cmd)) osd->osec.relro = false; @@ -916,7 +913,7 @@ OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) { script->createOutputSection(unquote(outSec), getCurrentLocation()); OutputSection *osec = &cmd->osec; // Maybe relro. Will reset to false if DATA_SEGMENT_RELRO_END is absent. - osec->relro = seenDataAlign && !seenRelroEnd; + osec->relro = script->seenDataAlign && !script->seenRelroEnd; size_t symbolsReferenced = script->referencedSymbols.size(); @@ -1047,10 +1044,11 @@ SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) { SymbolAssignment *ScriptParser::readAssignment(StringRef tok) { // Assert expression returns Dot, so this is equal to ".=." if (tok == "ASSERT") - return make<SymbolAssignment>(".", readAssert(), getCurrentLocation()); + return make<SymbolAssignment>(".", readAssert(), 0, getCurrentLocation()); size_t oldPos = pos; SymbolAssignment *cmd = nullptr; + bool savedSeenRelroEnd = script->seenRelroEnd; const StringRef op = peek(); if (op.starts_with("=")) { // Support = followed by an expression without whitespace. @@ -1071,6 +1069,7 @@ SymbolAssignment *ScriptParser::readAssignment(StringRef tok) { } if (cmd) { + cmd->dataSegmentRelroEnd = !savedSeenRelroEnd && script->seenRelroEnd; cmd->commandString = tok.str() + " " + llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " "); @@ -1084,7 +1083,7 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) { StringRef op = next(); assert(op == "=" || op == "*=" || op == "/=" || op == "+=" || op == "-=" || op == "&=" || op == "^=" || op == "|=" || op == "<<=" || op == ">>="); - // Note: GNU ld does not support %= or ^=. + // Note: GNU ld does not support %=. Expr e = readExpr(); if (op != "=") { std::string loc = getCurrentLocation(); @@ -1117,7 +1116,8 @@ SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) { } }; } - return make<SymbolAssignment>(name, e, getCurrentLocation()); + return make<SymbolAssignment>(name, e, ctx.scriptSymOrderCounter++, + getCurrentLocation()); } // This is an operator-precedence parser to parse a linker @@ -1438,7 +1438,7 @@ Expr ScriptParser::readPrimary() { expect(","); readExpr(); expect(")"); - seenDataAlign = true; + script->seenDataAlign = true; return [=] { uint64_t align = std::max(uint64_t(1), e().getValue()); return (script->getDot() + align - 1) & -align; @@ -1459,15 +1459,18 @@ Expr ScriptParser::readPrimary() { expect(","); readExpr(); expect(")"); - seenRelroEnd = true; - Expr e = getPageSize(); - return [=] { return alignToPowerOf2(script->getDot(), e().getValue()); }; + script->seenRelroEnd = true; + return [=] { return alignToPowerOf2(script->getDot(), config->maxPageSize); }; } if (tok == "DEFINED") { StringRef name = unquote(readParenLiteral()); + // Return 1 if s is defined. If the definition is only found in a linker + // script, it must happen before this DEFINED. + auto order = ctx.scriptSymOrderCounter++; return [=] { - Symbol *b = symtab.find(name); - return (b && b->isDefined()) ? 1 : 0; + Symbol *s = symtab.find(name); + return s && s->isDefined() && ctx.scriptSymOrder.lookup(s) < order ? 1 + : 0; }; } if (tok == "LENGTH") { diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 0437095c8638..b3d97e4de779 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -92,7 +92,6 @@ Symbol *SymbolTable::insert(StringRef name) { memset(sym, 0, sizeof(Symbol)); sym->setName(name); sym->partition = 1; - sym->verdefIndex = -1; sym->versionId = VER_NDX_GLOBAL; if (pos != StringRef::npos) sym->hasVersionSuffix = true; @@ -235,10 +234,9 @@ bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, sym->getName().contains('@')) continue; - // If the version has not been assigned, verdefIndex is -1. Use an arbitrary - // number (0) to indicate the version has been assigned. - if (sym->verdefIndex == uint16_t(-1)) { - sym->verdefIndex = 0; + // If the version has not been assigned, assign versionId to the symbol. + if (!sym->versionScriptAssigned) { + sym->versionScriptAssigned = true; sym->versionId = versionId; } if (sym->versionId == versionId) @@ -256,8 +254,8 @@ void SymbolTable::assignWildcardVersion(SymbolVersion ver, uint16_t versionId, // so we set a version to a symbol only if no version has been assigned // to the symbol. This behavior is compatible with GNU. for (Symbol *sym : findAllByVersion(ver, includeNonDefault)) - if (sym->verdefIndex == uint16_t(-1)) { - sym->verdefIndex = 0; + if (!sym->versionScriptAssigned) { + sym->versionScriptAssigned = true; sym->versionId = versionId; } } @@ -313,7 +311,7 @@ void SymbolTable::scanVersionScript() { // Then, assign versions to "*". In GNU linkers they have lower priority than // other wildcards. - for (VersionDefinition &v : config->versionDefinitions) { + for (VersionDefinition &v : llvm::reverse(config->versionDefinitions)) { for (SymbolVersion &pat : v.nonLocalPatterns) if (pat.hasWildcard && pat.name == "*") assignWildcard(pat, v.id, v.name); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 840385aabea3..734ca6bfcb40 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -316,12 +316,13 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { if (!config->warnSymbolOrdering) return; - // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning - // is emitted. It makes sense to not warn on undefined symbols. + // If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning is + // emitted. It makes sense to not warn on undefined symbols (excluding those + // demoted by demoteSymbols). // // Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols, // but we don't have to be compatible here. - if (sym->isUndefined() && + if (sym->isUndefined() && !cast<Undefined>(sym)->discardedSecIdx && config->unresolvedSymbols == UnresolvedPolicy::Ignore) return; @@ -330,9 +331,12 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); }; - if (sym->isUndefined()) - report(": unable to order undefined symbol: "); - else if (sym->isShared()) + if (sym->isUndefined()) { + if (cast<Undefined>(sym)->discardedSecIdx) + report(": unable to order discarded symbol: "); + else + report(": unable to order undefined symbol: "); + } else if (sym->isShared()) report(": unable to order shared symbol: "); else if (d && !d->section) report(": unable to order absolute symbol: "); @@ -365,6 +369,8 @@ bool elf::computeIsPreemptible(const Symbol &sym) { // in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of // -Bsymbolic-functions. if (config->symbolic || + (config->bsymbolic == BsymbolicKind::NonWeak && + sym.binding != STB_WEAK) || (config->bsymbolic == BsymbolicKind::Functions && sym.isFunc()) || (config->bsymbolic == BsymbolicKind::NonWeakFunctions && sym.isFunc() && sym.binding != STB_WEAK)) @@ -677,3 +683,13 @@ void Symbol::resolve(const SharedSymbol &other) { } else if (traced) printTraceSymbol(other, getName()); } + +void Defined::overwrite(Symbol &sym) const { + if (isa_and_nonnull<SharedFile>(sym.file)) + sym.versionId = VER_NDX_GLOBAL; + Symbol::overwrite(sym, DefinedKind); + auto &s = static_cast<Defined &>(sym); + s.value = value; + s.size = size; + s.section = section; +} diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index bb440530b4df..e34a31e12f99 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -254,8 +254,8 @@ protected: Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding, uint8_t stOther, uint8_t type) : file(file), nameData(name.data()), nameSize(name.size()), type(type), - binding(binding), stOther(stOther), symbolKind(k), - exportDynamic(false) {} + binding(binding), stOther(stOther), symbolKind(k), exportDynamic(false), + archSpecificBit(false) {} void overwrite(Symbol &sym, Kind k) const { if (sym.traced) @@ -279,9 +279,18 @@ public: // True if defined relative to a section discarded by ICF. uint8_t folded : 1; - // True if a call to this symbol needs to be followed by a restore of the - // PPC64 toc pointer. - uint8_t needsTocRestore : 1; + // Allow reuse of a bit between architecture-exclusive symbol flags. + // - needsTocRestore(): On PPC64, true if a call to this symbol needs to be + // followed by a restore of the toc pointer. + // - isTagged(): On AArch64, true if the symbol needs special relocation and + // metadata semantics because it's tagged, under the AArch64 MemtagABI. + uint8_t archSpecificBit : 1; + bool needsTocRestore() const { return archSpecificBit; } + bool isTagged() const { return archSpecificBit; } + void setNeedsTocRestore(bool v) { archSpecificBit = v; } + void setIsTagged(bool v) { + archSpecificBit = v; + } // True if this symbol is defined by a symbol assignment or wrapped by --wrap. // @@ -304,11 +313,12 @@ public: uint32_t auxIdx; uint32_t dynsymIndex; - // This field is a index to the symbol's version definition. - uint16_t verdefIndex; - - // Version definition index. + // If `file` is SharedFile (for SharedSymbol or copy-relocated Defined), this + // represents the Verdef index within the input DSO, which will be converted + // to a Verneed index in the output. Otherwise, this represents the Verdef + // index (VER_NDX_LOCAL, VER_NDX_GLOBAL, or a named version). uint16_t versionId; + uint8_t versionScriptAssigned : 1; void setFlags(uint16_t bits) { flags.fetch_or(bits, std::memory_order_relaxed); @@ -346,14 +356,7 @@ public: size(size), section(section) { exportDynamic = config->exportDynamic; } - void overwrite(Symbol &sym) const { - Symbol::overwrite(sym, DefinedKind); - sym.verdefIndex = -1; - auto &s = static_cast<Defined &>(sym); - s.value = value; - s.size = size; - s.section = section; - } + void overwrite(Symbol &sym) const; static bool classof(const Symbol *s) { return s->isDefined(); } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index de25750bf9eb..2b32eb3a0fe3 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -583,13 +583,14 @@ static uint64_t readFdeAddr(uint8_t *buf, int size) { uint64_t EhFrameSection::getFdePc(uint8_t *buf, size_t fdeOff, uint8_t enc) const { // The starting address to which this FDE applies is - // stored at FDE + 8 byte. + // stored at FDE + 8 byte. And this offset is within + // the .eh_frame section. size_t off = fdeOff + 8; uint64_t addr = readFdeAddr(buf + off, enc & 0xf); if ((enc & 0x70) == DW_EH_PE_absptr) return addr; if ((enc & 0x70) == DW_EH_PE_pcrel) - return addr + getParent()->addr + off; + return addr + getParent()->addr + off + outSecOff; fatal("unknown FDE size relative encoding"); } @@ -1453,6 +1454,10 @@ DynamicSection<ELFT>::computeContents() { addInt(DT_AARCH64_MEMTAG_MODE, config->androidMemtagMode == NT_MEMTAG_LEVEL_ASYNC); addInt(DT_AARCH64_MEMTAG_HEAP, config->androidMemtagHeap); addInt(DT_AARCH64_MEMTAG_STACK, config->androidMemtagStack); + if (mainPart->memtagDescriptors->isNeeded()) { + addInSec(DT_AARCH64_MEMTAG_GLOBALS, *mainPart->memtagDescriptors); + addInt(DT_AARCH64_MEMTAG_GLOBALSSZ, mainPart->memtagDescriptors->getSize()); + } } } @@ -2684,6 +2689,10 @@ size_t IBTPltSection::getSize() const { bool IBTPltSection::isNeeded() const { return in.plt->getNumEntries() > 0; } +RelroPaddingSection::RelroPaddingSection() + : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, 1, ".relro_padding") { +} + // The string hash function for .gdb_index. static uint32_t computeGdbHash(StringRef s) { uint32_t h = 0; @@ -3131,10 +3140,8 @@ bool VersionTableSection::isNeeded() const { void elf::addVerneed(Symbol *ss) { auto &file = cast<SharedFile>(*ss->file); - if (ss->verdefIndex == VER_NDX_GLOBAL) { - ss->versionId = VER_NDX_GLOBAL; + if (ss->versionId == VER_NDX_GLOBAL) return; - } if (file.vernauxs.empty()) file.vernauxs.resize(file.verdefs.size()); @@ -3143,10 +3150,10 @@ void elf::addVerneed(Symbol *ss) { // already allocated one. The verdef identifiers cover the range // [1..getVerDefNum()]; this causes the vernaux identifiers to start from // getVerDefNum()+1. - if (file.vernauxs[ss->verdefIndex] == 0) - file.vernauxs[ss->verdefIndex] = ++SharedFile::vernauxNum + getVerDefNum(); + if (file.vernauxs[ss->versionId] == 0) + file.vernauxs[ss->versionId] = ++SharedFile::vernauxNum + getVerDefNum(); - ss->versionId = file.vernauxs[ss->verdefIndex]; + ss->versionId = file.vernauxs[ss->versionId]; } template <class ELFT> @@ -3835,6 +3842,7 @@ void InStruct::reset() { got.reset(); gotPlt.reset(); igotPlt.reset(); + relroPadding.reset(); armCmseSGSection.reset(); ppc64LongBranchTarget.reset(); mipsAbiFlags.reset(); @@ -3900,6 +3908,76 @@ size_t PackageMetadataNote::getSize() const { alignTo(config->packageMetadata.size() + 1, 4); } +// Helper function, return the size of the ULEB128 for 'v', optionally writing +// it to `*(buf + offset)` if `buf` is non-null. +static size_t computeOrWriteULEB128(uint64_t v, uint8_t *buf, size_t offset) { + if (buf) + return encodeULEB128(v, buf + offset); + return getULEB128Size(v); +} + +// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#83encoding-of-sht_aarch64_memtag_globals_dynamic +constexpr uint64_t kMemtagStepSizeBits = 3; +constexpr uint64_t kMemtagGranuleSize = 16; +static size_t createMemtagDescriptors(const SmallVector<const Symbol *, 0> &symbols, + uint8_t *buf = nullptr) { + size_t sectionSize = 0; + uint64_t lastGlobalEnd = 0; + + for (const Symbol *sym : symbols) { + if (!includeInSymtab(*sym)) + continue; + const uint64_t addr = sym->getVA(); + const uint64_t size = sym->getSize(); + + if (addr <= kMemtagGranuleSize && buf != nullptr) + errorOrWarn("address of the tagged symbol \"" + sym->getName() + + "\" falls in the ELF header. This is indicative of a " + "compiler/linker bug"); + if (addr % kMemtagGranuleSize != 0) + errorOrWarn("address of the tagged symbol \"" + sym->getName() + + "\" at 0x" + Twine::utohexstr(addr) + + "\" is not granule (16-byte) aligned"); + if (size == 0) + errorOrWarn("size of the tagged symbol \"" + sym->getName() + + "\" is not allowed to be zero"); + if (size % kMemtagGranuleSize != 0) + errorOrWarn("size of the tagged symbol \"" + sym->getName() + + "\" (size 0x" + Twine::utohexstr(size) + + ") is not granule (16-byte) aligned"); + + const uint64_t sizeToEncode = size / kMemtagGranuleSize; + const uint64_t stepToEncode = ((addr - lastGlobalEnd) / kMemtagGranuleSize) + << kMemtagStepSizeBits; + if (sizeToEncode < (1 << kMemtagStepSizeBits)) { + sectionSize += computeOrWriteULEB128(stepToEncode | sizeToEncode, buf, sectionSize); + } else { + sectionSize += computeOrWriteULEB128(stepToEncode, buf, sectionSize); + sectionSize += computeOrWriteULEB128(sizeToEncode - 1, buf, sectionSize); + } + lastGlobalEnd = addr + size; + } + + return sectionSize; +} + +bool MemtagDescriptors::updateAllocSize() { + size_t oldSize = getSize(); + std::stable_sort(symbols.begin(), symbols.end(), + [](const Symbol *s1, const Symbol *s2) { + return s1->getVA() < s2->getVA(); + }); + return oldSize != getSize(); +} + +void MemtagDescriptors::writeTo(uint8_t *buf) { + createMemtagDescriptors(symbols, buf); +} + +size_t MemtagDescriptors::getSize() const { + return createMemtagDescriptors(symbols); +} + InStruct elf::in; std::vector<Partition> elf::partitions; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 38d0c80a073d..3a9f4ba886f6 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -22,8 +22,10 @@ #include "Config.h" #include "InputSection.h" +#include "Symbols.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Endian.h" @@ -776,6 +778,16 @@ public: size_t getSize() const override; }; +// Used to align the end of the PT_GNU_RELRO segment and the associated PT_LOAD +// segment to a common-page-size boundary. This padding section ensures that all +// pages in the PT_LOAD segment is covered by at least one section. +class RelroPaddingSection final : public SyntheticSection { +public: + RelroPaddingSection(); + size_t getSize() const override { return 0; } + void writeTo(uint8_t *buf) override {} +}; + class GdbIndexSection final : public SyntheticSection { public: struct AddressEntry { @@ -1245,6 +1257,32 @@ public: size_t getSize() const override; }; +class MemtagDescriptors final : public SyntheticSection { +public: + MemtagDescriptors() + : SyntheticSection(llvm::ELF::SHF_ALLOC, + llvm::ELF::SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC, + /*alignment=*/4, ".memtag.globals.dynamic") {} + void writeTo(uint8_t *buf) override; + // The size of the section is non-computable until all addresses are + // synthetized, because the section's contents contain a sorted + // varint-compressed list of pointers to global variables. We only know the + // final size after `finalizeAddressDependentContent()`. + size_t getSize() const override; + bool updateAllocSize() override; + + void addSymbol(const Symbol &sym) { + symbols.push_back(&sym); + } + + bool isNeeded() const override { + return !symbols.empty(); + } + +private: + SmallVector<const Symbol *, 0> symbols; +}; + InputSection *createInterpSection(); MergeInputSection *createCommentSection(); template <class ELFT> void splitSections(); @@ -1277,6 +1315,7 @@ struct Partition { std::unique_ptr<GnuHashTableSection> gnuHashTab; std::unique_ptr<HashTableSection> hashTab; std::unique_ptr<MemtagAndroidNote> memtagAndroidNote; + std::unique_ptr<MemtagDescriptors> memtagDescriptors; std::unique_ptr<PackageMetadataNote> packageMetadataNote; std::unique_ptr<RelocationBaseSection> relaDyn; std::unique_ptr<RelrBaseSection> relrDyn; @@ -1304,6 +1343,7 @@ struct InStruct { std::unique_ptr<GotSection> got; std::unique_ptr<GotPltSection> gotPlt; std::unique_ptr<IgotPltSection> igotPlt; + std::unique_ptr<RelroPaddingSection> relroPadding; std::unique_ptr<SyntheticSection> armCmseSGSection; std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget; std::unique_ptr<SyntheticSection> mipsAbiFlags; diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 32bb2164a208..41990f40f68b 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -159,6 +159,8 @@ void TargetInfo::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { uint64_t secAddr = sec.getOutputSection()->addr; if (auto *s = dyn_cast<InputSection>(&sec)) secAddr += s->outSecOff; + else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) + secAddr += ehIn->getParent()->outSecOff; for (const Relocation &rel : sec.relocs()) { uint8_t *loc = buf + rel.offset; const uint64_t val = SignExtend64( diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 9d4f22dd93f1..6264ab1a3da7 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -14,6 +14,7 @@ #include "lld/Common/ErrorHandler.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Object/ELF.h" +#include "llvm/Object/ELFTypes.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/MathExtras.h" #include <array> @@ -207,6 +208,7 @@ void processArmCmseSymbols(); void writePPC32GlinkSection(uint8_t *buf, size_t numEntries); unsigned getPPCDFormOp(unsigned secondaryOp); +unsigned getPPCDSFormOp(unsigned secondaryOp); // In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first // is a global entry point (GEP) which typically is used to initialize the TOC @@ -233,6 +235,7 @@ void addArmInputSectionMappingSymbols(); void addArmSyntheticSectionMappingSymbol(Defined *); void sortArmMappingSymbols(); void convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf); +void createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files); LLVM_LIBRARY_VISIBILITY extern const TargetInfo *target; TargetInfo *getTarget(); @@ -306,17 +309,17 @@ inline void write64(void *p, uint64_t v) { #endif #define invokeELFT(f, ...) \ switch (config->ekind) { \ - case ELF32LEKind: \ - f<ELF32LE>(__VA_ARGS__); \ + case lld::elf::ELF32LEKind: \ + f<llvm::object::ELF32LE>(__VA_ARGS__); \ break; \ - case ELF32BEKind: \ - f<ELF32BE>(__VA_ARGS__); \ + case lld::elf::ELF32BEKind: \ + f<llvm::object::ELF32BE>(__VA_ARGS__); \ break; \ - case ELF64LEKind: \ - f<ELF64LE>(__VA_ARGS__); \ + case lld::elf::ELF64LEKind: \ + f<llvm::object::ELF64LE>(__VA_ARGS__); \ break; \ - case ELF64BEKind: \ - f<ELF64BE>(__VA_ARGS__); \ + case lld::elf::ELF64BEKind: \ + f<llvm::object::ELF64BE>(__VA_ARGS__); \ break; \ default: \ llvm_unreachable("unknown config->ekind"); \ diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 30559dbe8263..5f543ffdcfaa 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -1138,7 +1138,7 @@ void PPC64PltCallStub::writeTo(uint8_t *buf) { void PPC64PltCallStub::addSymbols(ThunkSection &isec) { Defined *s = addSymbol(saver().save("__plt_" + destination.getName()), STT_FUNC, 0, isec); - s->needsTocRestore = true; + s->setNeedsTocRestore(true); s->file = destination.file; } @@ -1182,7 +1182,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) { void PPC64R2SaveStub::addSymbols(ThunkSection &isec) { Defined *s = addSymbol(saver().save("__toc_save_" + destination.getName()), STT_FUNC, 0, isec); - s->needsTocRestore = true; + s->setNeedsTocRestore(true); } bool PPC64R2SaveStub::isCompatibleWith(const InputSection &isec, diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 368c9aabceae..a84e4864ab0e 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -53,7 +53,6 @@ public: void run(); private: - void copyLocalSymbols(); void addSectionSymbols(); void sortSections(); void resolveShfLinkOrder(); @@ -251,6 +250,59 @@ void elf::addReservedSymbols() { ElfSym::edata2 = add("_edata", -1); } +static void demoteDefined(Defined &sym, DenseMap<SectionBase *, size_t> &map) { + if (map.empty()) + for (auto [i, sec] : llvm::enumerate(sym.file->getSections())) + map.try_emplace(sec, i); + // Change WEAK to GLOBAL so that if a scanned relocation references sym, + // maybeReportUndefined will report an error. + uint8_t binding = sym.isWeak() ? uint8_t(STB_GLOBAL) : sym.binding; + Undefined(sym.file, sym.getName(), binding, sym.stOther, sym.type, + /*discardedSecIdx=*/map.lookup(sym.section)) + .overwrite(sym); +} + +// If all references to a DSO happen to be weak, the DSO is not added to +// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid +// dangling references to an unneeded DSO. Use a weak binding to avoid +// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols. +// +// In addition, demote symbols defined in discarded sections, so that +// references to /DISCARD/ discarded symbols will lead to errors. +static void demoteSymbolsAndComputeIsPreemptible() { + llvm::TimeTraceScope timeScope("Demote symbols"); + DenseMap<InputFile *, DenseMap<SectionBase *, size_t>> sectionIndexMap; + for (Symbol *sym : symtab.getSymbols()) { + if (auto *d = dyn_cast<Defined>(sym)) { + if (d->section && !d->section->isLive()) + demoteDefined(*d, sectionIndexMap[d->file]); + } else { + auto *s = dyn_cast<SharedSymbol>(sym); + if (sym->isLazy() || (s && !cast<SharedFile>(s->file)->isNeeded)) { + uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK); + Undefined(nullptr, sym->getName(), binding, sym->stOther, sym->type) + .overwrite(*sym); + sym->versionId = VER_NDX_GLOBAL; + } + } + + if (config->hasDynSymTab) + sym->isPreemptible = computeIsPreemptible(*sym); + } +} + +// Fully static executables don't support MTE globals at this point in time, as +// we currently rely on: +// - A dynamic loader to process relocations, and +// - Dynamic entries. +// This restriction could be removed in future by re-using some of the ideas +// that ifuncs use in fully static executables. +bool elf::canHaveMemtagGlobals() { + return config->emachine == EM_AARCH64 && + config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE && + (config->relocatable || config->shared || needsInterpSection()); +} + static OutputSection *findSection(StringRef name, unsigned partition = 1) { for (SectionCommand *cmd : script->sectionCommands) if (auto *osd = dyn_cast<OutputDesc>(cmd)) @@ -345,10 +397,11 @@ template <class ELFT> void elf::createSyntheticSections() { std::make_unique<SymbolTableSection<ELFT>>(*part.dynStrTab); part.dynamic = std::make_unique<DynamicSection<ELFT>>(); - if (config->emachine == EM_AARCH64 && - config->androidMemtagMode != ELF::NT_MEMTAG_LEVEL_NONE) { + if (canHaveMemtagGlobals()) { part.memtagAndroidNote = std::make_unique<MemtagAndroidNote>(); add(*part.memtagAndroidNote); + part.memtagDescriptors = std::make_unique<MemtagDescriptors>(); + add(*part.memtagDescriptors); } if (config->androidPackDynRelocs) @@ -453,6 +506,13 @@ template <class ELFT> void elf::createSyntheticSections() { add(*in.gotPlt); in.igotPlt = std::make_unique<IgotPltSection>(); add(*in.igotPlt); + // Add .relro_padding if DATA_SEGMENT_RELRO_END is used; otherwise, add the + // section in the absence of PHDRS/SECTIONS commands. + if (config->zRelro && ((script->phdrsCommands.empty() && + !script->hasSectionsCommand) || script->seenRelroEnd)) { + in.relroPadding = std::make_unique<RelroPaddingSection>(); + add(*in.relroPadding); + } if (config->emachine == EM_ARM) { in.armCmseSGSection = std::make_unique<ArmCmseSGSection>(); @@ -525,11 +585,6 @@ template <class ELFT> void elf::createSyntheticSections() { // The main function of the writer. template <class ELFT> void Writer<ELFT>::run() { - copyLocalSymbols(); - - if (config->copyRelocs) - addSectionSymbols(); - // Now that we have a complete set of output sections. This function // completes section contents. For example, we need to add strings // to the string table, and add entries to .got and .plt. @@ -672,37 +727,39 @@ static bool shouldKeepInSymtab(const Defined &sym) { return true; } -static bool includeInSymtab(const Symbol &b) { +bool lld::elf::includeInSymtab(const Symbol &b) { if (auto *d = dyn_cast<Defined>(&b)) { // Always include absolute symbols. SectionBase *sec = d->section; if (!sec) return true; + assert(sec->isLive()); if (auto *s = dyn_cast<MergeInputSection>(sec)) return s->getSectionPiece(d->value).live; - return sec->isLive(); + return true; } return b.used || !config->gcSections; } -// Local symbols are not in the linker's symbol table. This function scans -// each object file's symbol table to copy local symbols to the output. -template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { - if (!in.symTab) - return; +// Scan local symbols to: +// +// - demote symbols defined relative to /DISCARD/ discarded input sections so +// that relocations referencing them will lead to errors. +// - copy eligible symbols to .symTab +static void demoteAndCopyLocalSymbols() { llvm::TimeTraceScope timeScope("Add local symbols"); - if (config->copyRelocs && config->discard != DiscardPolicy::None) - markUsedLocalSymbols<ELFT>(); for (ELFFileBase *file : ctx.objectFiles) { + DenseMap<SectionBase *, size_t> sectionIndexMap; for (Symbol *b : file->getLocalSymbols()) { assert(b->isLocal() && "should have been caught in initializeSymbols()"); auto *dr = dyn_cast<Defined>(b); - - // No reason to keep local undefined symbol in symtab. if (!dr) continue; - if (includeInSymtab(*b) && shouldKeepInSymtab(*dr)) + + if (dr->section && !dr->section->isLive()) + demoteDefined(*dr, sectionIndexMap); + else if (in.symTab && includeInSymtab(*b) && shouldKeepInSymtab(*dr)) in.symTab->addSymbol(b); } } @@ -813,6 +870,9 @@ static bool isRelroSection(const OutputSection *sec) { if (sec == in.gotPlt->getParent()) return config->zNow; + if (in.relroPadding && sec == in.relroPadding->getParent()) + return true; + // .dynamic section contains data for the dynamic linker, and // there's no need to write to it at runtime, so it's better to put // it into RELRO. @@ -852,7 +912,7 @@ enum RankFlags { RF_BSS = 1 << 7, }; -static unsigned getSectionRank(const OutputSection &osec) { +static unsigned getSectionRank(OutputSection &osec) { unsigned rank = osec.partition * RF_PARTITION; // We want to put section specified by -T option first, so we @@ -915,7 +975,9 @@ static unsigned getSectionRank(const OutputSection &osec) { // TLS sections directly before the other RELRO sections. if (!(osec.flags & SHF_TLS)) rank |= RF_NOT_TLS; - if (!isRelroSection(&osec)) + if (isRelroSection(&osec)) + osec.relro = true; + else rank |= RF_NOT_RELRO; // Place .ldata and .lbss after .bss. Making .bss closer to .text alleviates // relocation overflow pressure. @@ -936,12 +998,10 @@ static unsigned getSectionRank(const OutputSection &osec) { // their coverage by a single signed 16-bit offset from the TOC base // pointer. StringRef name = osec.name; - if (name == ".branch_lt") + if (name == ".got") rank |= 1; - else if (name == ".got") - rank |= 2; else if (name == ".toc") - rank |= 4; + rank |= 2; } if (config->emachine == EM_MIPS) { @@ -1137,6 +1197,18 @@ findOrphanPos(SmallVectorImpl<SectionCommand *>::iterator b, SmallVectorImpl<SectionCommand *>::iterator e) { OutputSection *sec = &cast<OutputDesc>(*e)->osec; + // As a special case, place .relro_padding before the SymbolAssignment using + // DATA_SEGMENT_RELRO_END, if present. + if (in.relroPadding && sec == in.relroPadding->getParent()) { + auto i = std::find_if(b, e, [=](SectionCommand *a) { + if (auto *assign = dyn_cast<SymbolAssignment>(a)) + return assign->dataSegmentRelroEnd; + return false; + }); + if (i != e) + return i; + } + // Find the first element that has as close a rank as possible. auto i = std::max_element(b, e, [=](SectionCommand *a, SectionCommand *b) { return getRankProximity(sec, a) < getRankProximity(sec, b); @@ -1645,6 +1717,7 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { changed |= a32p.createFixes(); } + finalizeSynthetic(in.got.get()); if (in.mipsGot) in.mipsGot->updateAllocSize(); @@ -1652,6 +1725,8 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() { changed |= part.relaDyn->updateAllocSize(); if (part.relrDyn) changed |= part.relrDyn->updateAllocSize(); + if (part.memtagDescriptors) + changed |= part.memtagDescriptors->updateAllocSize(); } const Defined *changedSym = script->assignAddresses(); @@ -1897,14 +1972,17 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { for (Partition &part : partitions) finalizeSynthetic(part.ehFrame.get()); } - - if (config->hasDynSymTab) { - parallelForEach(symtab.getSymbols(), [](Symbol *sym) { - sym->isPreemptible = computeIsPreemptible(*sym); - }); - } } + demoteSymbolsAndComputeIsPreemptible(); + + if (config->copyRelocs && config->discard != DiscardPolicy::None) + markUsedLocalSymbols<ELFT>(); + demoteAndCopyLocalSymbols(); + + if (config->copyRelocs) + addSectionSymbols(); + // Change values of linker-script-defined symbols from placeholders (assigned // by declareSymbols) to actual definitions. script->processSymbolAssignments(); @@ -1945,10 +2023,16 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { }); if (!allNeededIsKnown) continue; - for (Symbol *sym : file->requiredSymbols) - if (sym->isUndefined() && !sym->isWeak()) + for (Symbol *sym : file->requiredSymbols) { + if (sym->isUndefined() && !sym->isWeak()) { diagnose("undefined reference due to --no-allow-shlib-undefined: " + toString(*sym) + "\n>>> referenced by " + toString(file)); + } else if (sym->isDefined() && sym->computeBinding() == STB_LOCAL) { + diagnose("non-exported symbol '" + toString(*sym) + "' in '" + + toString(sym->file) + "' is referenced by DSO '" + + toString(file) + "'"); + } + } } } } @@ -2158,7 +2242,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { for (OutputSection *sec : outputSections) sec->finalize(); - script->checkMemoryRegions(); + script->checkFinalScriptConditions(); if (config->emachine == EM_ARM && !config->isLE && config->armBe8) { addArmInputSectionMappingSymbols(); @@ -2329,6 +2413,7 @@ SmallVector<PhdrEntry *, 0> Writer<ELFT>::createPhdrs(Partition &part) { relroEnd = sec; } } + relRo->p_align = 1; for (OutputSection *sec : outputSections) { if (!needsPtLoad(sec)) @@ -2672,16 +2757,6 @@ template <class ELFT> void Writer<ELFT>::setPhdrs(Partition &part) { if (!p->hasLMA) p->p_paddr = first->getLMA(); } - - if (p->p_type == PT_GNU_RELRO) { - p->p_align = 1; - // musl/glibc ld.so rounds the size down, so we need to round up - // to protect the last page. This is a no-op on FreeBSD which always - // rounds up. - p->p_memsz = - alignToPowerOf2(p->p_offset + p->p_memsz, config->commonPageSize) - - p->p_offset; - } } } diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h index a302caad339f..eaf021aac42e 100644 --- a/lld/ELF/Writer.h +++ b/lld/ELF/Writer.h @@ -46,6 +46,7 @@ struct PhdrEntry { }; void addReservedSymbols(); +bool includeInSymtab(const Symbol &b); template <class ELFT> uint32_t calcMipsEFlags(); @@ -55,6 +56,8 @@ uint8_t getMipsFpAbiFlag(uint8_t oldFlag, uint8_t newFlag, bool isMipsN32Abi(const InputFile *f); bool isMicroMips(); bool isMipsR6(); + +bool canHaveMemtagGlobals(); } // namespace lld::elf #endif diff --git a/lld/MachO/Arch/ARM64.cpp b/lld/MachO/Arch/ARM64.cpp index e28e82a78cbd..e3781763c610 100644 --- a/lld/MachO/Arch/ARM64.cpp +++ b/lld/MachO/Arch/ARM64.cpp @@ -602,11 +602,15 @@ void ARM64::applyOptimizationHints(uint8_t *outBuf, const ObjFile &obj) const { addr < sectionAddr + section->getSize()) return true; + if (obj.sections.empty()) + return false; auto secIt = std::prev(llvm::upper_bound( obj.sections, addr, [](uint64_t off, const Section *sec) { return off < sec->addr; })); const Section *sec = *secIt; + if (sec->subsections.empty()) + return false; auto subsecIt = std::prev(llvm::upper_bound( sec->subsections, addr - sec->addr, [](uint64_t off, Subsection subsec) { return off < subsec.offset; })); diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 59eb882c0783..f820513a111e 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -26,9 +26,9 @@ #include <vector> -namespace llvm::CodeGenOpt { -enum Level : int; -} // namespace llvm::CodeGenOpt +namespace llvm { +enum class CodeGenOptLevel; +} // namespace llvm namespace lld { namespace macho { @@ -167,7 +167,7 @@ struct Configuration { llvm::StringRef thinLTOJobs; llvm::StringRef umbrella; uint32_t ltoo = 2; - llvm::CodeGenOpt::Level ltoCgo; + llvm::CodeGenOptLevel ltoCgo; llvm::CachePruningPolicy thinLTOCachePolicy; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; @@ -208,6 +208,7 @@ struct Configuration { bool ltoDebugPassManager = false; bool csProfileGenerate = false; llvm::StringRef csProfilePath; + bool pgoWarnMismatch; bool callGraphProfileSort = false; llvm::StringRef printSymbolOrder; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index ce7f6d567b61..5885a1fae4f3 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -311,7 +311,7 @@ static InputFile *addFile(StringRef path, LoadType loadType, path::filename(path).starts_with("libswift"); if ((isCommandLineLoad && config->allLoad) || loadType == LoadType::CommandLineForce || isLCLinkerForceLoad) { - if (std::optional<MemoryBufferRef> buffer = readFile(path)) { + if (readFile(path)) { Error e = Error::success(); for (const object::Archive::Child &c : file->getArchive().children(e)) { StringRef reason; @@ -341,7 +341,7 @@ static InputFile *addFile(StringRef path, LoadType loadType, // TODO: no need to look for ObjC sections for a given archive member if // we already found that it contains an ObjC symbol. - if (std::optional<MemoryBufferRef> buffer = readFile(path)) { + if (readFile(path)) { Error e = Error::success(); for (const object::Archive::Child &c : file->getArchive().children(e)) { Expected<MemoryBufferRef> mb = c.getMemoryBufferRef(); @@ -411,7 +411,7 @@ static InputFile *addFile(StringRef path, LoadType loadType, static std::vector<StringRef> missingAutolinkWarnings; static void addLibrary(StringRef name, bool isNeeded, bool isWeak, bool isReexport, bool isHidden, bool isExplicit, - LoadType loadType, InputFile *originFile = nullptr) { + LoadType loadType) { if (std::optional<StringRef> path = findLibrary(name)) { if (auto *dylibFile = dyn_cast_or_null<DylibFile>( addFile(*path, loadType, /*isLazy=*/false, isExplicit, @@ -428,10 +428,8 @@ static void addLibrary(StringRef name, bool isNeeded, bool isWeak, return; } if (loadType == LoadType::LCLinkerOption) { - assert(originFile); missingAutolinkWarnings.push_back( - saver().save(toString(originFile) + - ": auto-linked library not found for -l" + name)); + saver().save("auto-linked library not found for -l" + name)); return; } error("library not found for -l" + name); @@ -439,8 +437,7 @@ static void addLibrary(StringRef name, bool isNeeded, bool isWeak, static DenseSet<StringRef> loadedObjectFrameworks; static void addFramework(StringRef name, bool isNeeded, bool isWeak, - bool isReexport, bool isExplicit, LoadType loadType, - InputFile *originFile = nullptr) { + bool isReexport, bool isExplicit, LoadType loadType) { if (std::optional<StringRef> path = findFramework(name)) { if (loadedObjectFrameworks.contains(*path)) return; @@ -468,10 +465,8 @@ static void addFramework(StringRef name, bool isNeeded, bool isWeak, return; } if (loadType == LoadType::LCLinkerOption) { - assert(originFile); - missingAutolinkWarnings.push_back(saver().save( - toString(originFile) + - ": auto-linked framework not found for -framework " + name)); + missingAutolinkWarnings.push_back( + saver().save("auto-linked framework not found for -framework " + name)); return; } error("framework not found for -framework " + name); @@ -480,7 +475,9 @@ static void addFramework(StringRef name, bool isNeeded, bool isWeak, // Parses LC_LINKER_OPTION contents, which can add additional command line // flags. This directly parses the flags instead of using the standard argument // parser to improve performance. -void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { +void macho::parseLCLinkerOption( + llvm::SmallVectorImpl<StringRef> &LCLinkerOptions, InputFile *f, + unsigned argc, StringRef data) { if (config->ignoreAutoLink) return; @@ -498,19 +495,40 @@ void macho::parseLCLinkerOption(InputFile *f, unsigned argc, StringRef data) { if (arg.consume_front("-l")) { if (config->ignoreAutoLinkOptions.contains(arg)) return; - addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isHidden=*/false, /*isExplicit=*/false, - LoadType::LCLinkerOption, f); } else if (arg == "-framework") { StringRef name = argv[++i]; if (config->ignoreAutoLinkOptions.contains(name)) return; - addFramework(name, /*isNeeded=*/false, /*isWeak=*/false, - /*isReexport=*/false, /*isExplicit=*/false, - LoadType::LCLinkerOption, f); } else { error(arg + " is not allowed in LC_LINKER_OPTION"); } + + LCLinkerOptions.append(argv); +} + +void macho::resolveLCLinkerOptions() { + while (!unprocessedLCLinkerOptions.empty()) { + SmallVector<StringRef> LCLinkerOptions(unprocessedLCLinkerOptions); + unprocessedLCLinkerOptions.clear(); + + for (unsigned i = 0; i < LCLinkerOptions.size(); ++i) { + StringRef arg = LCLinkerOptions[i]; + if (arg.consume_front("-l")) { + assert(!config->ignoreAutoLinkOptions.contains(arg)); + addLibrary(arg, /*isNeeded=*/false, /*isWeak=*/false, + /*isReexport=*/false, /*isHidden=*/false, + /*isExplicit=*/false, LoadType::LCLinkerOption); + } else if (arg == "-framework") { + StringRef name = LCLinkerOptions[++i]; + assert(!config->ignoreAutoLinkOptions.contains(name)); + addFramework(name, /*isNeeded=*/false, /*isWeak=*/false, + /*isReexport=*/false, /*isExplicit=*/false, + LoadType::LCLinkerOption); + } else { + error(arg + " is not allowed in LC_LINKER_OPTION"); + } + } + } } static void addFileList(StringRef path, bool isLazy) { @@ -1335,8 +1353,10 @@ static void createAliases() { } static void handleExplicitExports() { + static constexpr int kMaxWarnings = 3; if (config->hasExplicitExports) { - parallelForEach(symtab->getSymbols(), [](Symbol *sym) { + std::atomic<uint64_t> warningsCount{0}; + parallelForEach(symtab->getSymbols(), [&warningsCount](Symbol *sym) { if (auto *defined = dyn_cast<Defined>(sym)) { if (config->exportedSymbols.match(sym->getName())) { if (defined->privateExtern) { @@ -1347,8 +1367,12 @@ static void handleExplicitExports() { // The former can be exported but the latter cannot. defined->privateExtern = false; } else { - warn("cannot export hidden symbol " + toString(*defined) + - "\n>>> defined in " + toString(defined->getFile())); + // Only print the first 3 warnings verbosely, and + // shorten the rest to avoid crowding logs. + if (warningsCount.fetch_add(1, std::memory_order_relaxed) < + kMaxWarnings) + warn("cannot export hidden symbol " + toString(*defined) + + "\n>>> defined in " + toString(defined->getFile())); } } } else { @@ -1358,6 +1382,9 @@ static void handleExplicitExports() { dysym->shouldReexport = config->exportedSymbols.match(sym->getName()); } }); + if (warningsCount > kMaxWarnings) + warn("<... " + Twine(warningsCount - kMaxWarnings) + + " more similar warnings...>"); } else if (!config->unexportedSymbols.empty()) { parallelForEach(symtab->getSymbols(), [](Symbol *sym) { if (auto *defined = dyn_cast<Defined>(sym)) @@ -1387,6 +1414,7 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, missingAutolinkWarnings.clear(); syntheticSections.clear(); thunkMap.clear(); + unprocessedLCLinkerOptions.clear(); firstTLVDataSection = nullptr; tar = nullptr; @@ -1640,6 +1668,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->csProfileGenerate = args.hasArg(OPT_cs_profile_generate); config->csProfilePath = args.getLastArgValue(OPT_cs_profile_path); + config->pgoWarnMismatch = + args.hasFlag(OPT_pgo_warn_mismatch, OPT_no_pgo_warn_mismatch, true); config->generateUuid = !args.hasArg(OPT_no_uuid); for (const Arg *arg : args.filtered(OPT_alias)) { @@ -1889,6 +1919,8 @@ bool link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS, bool didCompileBitcodeFiles = compileBitcodeFiles(); + resolveLCLinkerOptions(); + // If --thinlto-index-only is given, we should create only "index // files" and not object files. Index file creation is already done // in compileBitcodeFiles, so we are done if that's the case. diff --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h index bed752ff6258..82cd1880ecab 100644 --- a/lld/MachO/Driver.h +++ b/lld/MachO/Driver.h @@ -35,12 +35,14 @@ public: // Create enum with OPT_xxx values for each option in Options.td enum { OPT_INVALID = 0, -#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__), #include "Options.inc" #undef OPTION }; -void parseLCLinkerOption(InputFile *, unsigned argc, StringRef data); +void parseLCLinkerOption(llvm::SmallVectorImpl<StringRef> &LCLinkerOptions, + InputFile *f, unsigned argc, StringRef data); +void resolveLCLinkerOptions(); std::string createResponseFile(const llvm::opt::InputArgList &args); diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp index b0be96f3592c..17499451382a 100644 --- a/lld/MachO/DriverUtils.cpp +++ b/lld/MachO/DriverUtils.cpp @@ -44,9 +44,13 @@ using namespace lld::macho; // Create table mapping all options defined in Options.td static constexpr OptTable::Info optInfo[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ - {X1, X2, X10, X11, OPT_##ID, Option::KIND##Class, \ - X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, \ + VISIBILITY, PARAM, HELPTEXT, METAVAR, VALUES) \ + {PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, VISIBILITY, \ + OPT_##GROUP, OPT_##ALIAS, ALIASARGS, \ + VALUES}, #include "Options.inc" #undef OPTION }; diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index c89f6f4722dc..31ed24149e78 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -185,6 +185,27 @@ static bool checkCompatibility(const InputFile *input) { return true; } +template <class Header> +static bool compatWithTargetArch(const InputFile *file, const Header *hdr) { + uint32_t cpuType; + std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch()); + + if (hdr->cputype != cpuType) { + Architecture arch = + getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype); + auto msg = config->errorForArchMismatch + ? static_cast<void (*)(const Twine &)>(error) + : warn; + + msg(toString(file) + " has architecture " + getArchitectureName(arch) + + " which is incompatible with target architecture " + + getArchitectureName(config->arch())); + return false; + } + + return checkCompatibility(file); +} + // This cache mostly exists to store system libraries (and .tbds) as they're // loaded, rather than the input archives, which are already cached at a higher // level, and other files like the filelist that are only read once. @@ -299,7 +320,7 @@ static std::optional<size_t> getRecordSize(StringRef segname, StringRef name) { static Error parseCallGraph(ArrayRef<uint8_t> data, std::vector<CallGraphEntry> &callGraph) { TimeTraceScope timeScope("Parsing call graph section"); - BinaryStreamReader reader(data, support::little); + BinaryStreamReader reader(data, llvm::endianness::little); while (!reader.empty()) { uint32_t fromIndex, toIndex; uint64_t count; @@ -929,10 +950,26 @@ OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, section.subsections.push_back({0, isec}); } +template <class LP> +void ObjFile::parseLinkerOptions(SmallVectorImpl<StringRef> &LCLinkerOptions) { + using Header = typename LP::mach_header; + auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart()); + + for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) { + StringRef data{reinterpret_cast<const char *>(cmd + 1), + cmd->cmdsize - sizeof(linker_option_command)}; + parseLCLinkerOption(LCLinkerOptions, this, cmd->count, data); + } +} + +SmallVector<StringRef> macho::unprocessedLCLinkerOptions; ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, - bool lazy, bool forceHidden) - : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden) { + bool lazy, bool forceHidden, bool compatArch, + bool builtFromBitcode) + : InputFile(ObjKind, mb, lazy), modTime(modTime), forceHidden(forceHidden), + builtFromBitcode(builtFromBitcode) { this->archiveName = std::string(archiveName); + this->compatArch = compatArch; if (lazy) { if (target->wordSize == 8) parseLazy<LP64>(); @@ -955,28 +992,17 @@ template <class LP> void ObjFile::parse() { auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart()); - uint32_t cpuType; - std::tie(cpuType, std::ignore) = getCPUTypeFromArchitecture(config->arch()); - if (hdr->cputype != cpuType) { - Architecture arch = - getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype); - auto msg = config->errorForArchMismatch - ? static_cast<void (*)(const Twine &)>(error) - : warn; - msg(toString(this) + " has architecture " + getArchitectureName(arch) + - " which is incompatible with target architecture " + - getArchitectureName(config->arch())); + // If we've already checked the arch, then don't need to check again. + if (!compatArch) return; - } - - if (!checkCompatibility(this)) + if (!(compatArch = compatWithTargetArch(this, hdr))) return; - for (auto *cmd : findCommands<linker_option_command>(hdr, LC_LINKER_OPTION)) { - StringRef data{reinterpret_cast<const char *>(cmd + 1), - cmd->cmdsize - sizeof(linker_option_command)}; - parseLCLinkerOption(this, cmd->count, data); - } + // We will resolve LC linker options once all native objects are loaded after + // LTO is finished. + SmallVector<StringRef, 4> LCLinkerOptions; + parseLinkerOptions<LP>(LCLinkerOptions); + unprocessedLCLinkerOptions.append(LCLinkerOptions); ArrayRef<SectionHeader> sectionHeaders; if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) { @@ -1026,6 +1052,12 @@ template <class LP> void ObjFile::parseLazy() { auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart()); + + if (!compatArch) + return; + if (!(compatArch = compatWithTargetArch(this, hdr))) + return; + const load_command *cmd = findCommand(hdr, LC_SYMTAB); if (!cmd) return; @@ -1490,13 +1522,22 @@ void ObjFile::registerEhFrames(Section &ehFrameSection) { } std::string ObjFile::sourceFile() const { + const char *unitName = compileUnit->getUnitDIE().getShortName(); + // DWARF allows DW_AT_name to be absolute, in which case nothing should be + // prepended. As for the styles, debug info can contain paths from any OS, not + // necessarily an OS we're currently running on. Moreover different + // compilation units can be compiled on different operating systems and linked + // together later. + if (sys::path::is_absolute(unitName, llvm::sys::path::Style::posix) || + sys::path::is_absolute(unitName, llvm::sys::path::Style::windows)) + return unitName; SmallString<261> dir(compileUnit->getCompilationDir()); StringRef sep = sys::path::get_separator(); // We don't use `path::append` here because we want an empty `dir` to result // in an absolute path. `append` would give us a relative path for that case. if (!dir.endswith(sep)) dir += sep; - return (dir + compileUnit->getUnitDIE().getShortName()).str(); + return (dir + unitName).str(); } lld::DWARFCache *ObjFile::getDwarf() { @@ -2089,22 +2130,51 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden) forceHidden(forceHidden) {} void ArchiveFile::addLazySymbols() { + // Avoid calling getMemoryBufferRef() on zero-symbol archive + // since that crashes. + if (file->isEmpty() || file->getNumberOfSymbols() == 0) + return; + + Error err = Error::success(); + auto child = file->child_begin(err); + // Ignore the I/O error here - will be reported later. + if (!err) { + Expected<MemoryBufferRef> mbOrErr = child->getMemoryBufferRef(); + if (!mbOrErr) { + llvm::consumeError(mbOrErr.takeError()); + } else { + if (identify_magic(mbOrErr->getBuffer()) == file_magic::macho_object) { + if (target->wordSize == 8) + compatArch = compatWithTargetArch( + this, reinterpret_cast<const LP64::mach_header *>( + mbOrErr->getBufferStart())); + else + compatArch = compatWithTargetArch( + this, reinterpret_cast<const ILP32::mach_header *>( + mbOrErr->getBufferStart())); + if (!compatArch) + return; + } + } + } + for (const object::Archive::Symbol &sym : file->symbols()) symtab->addLazyArchive(sym.getName(), this, sym); } static Expected<InputFile *> loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, - uint64_t offsetInArchive, bool forceHidden) { + uint64_t offsetInArchive, bool forceHidden, bool compatArch) { if (config->zeroModTime) modTime = 0; switch (identify_magic(mb.getBuffer())) { case file_magic::macho_object: - return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden); + return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden, + compatArch); case file_magic::bitcode: return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false, - forceHidden); + forceHidden, compatArch); default: return createStringError(inconvertibleErrorCode(), mb.getBufferIdentifier() + @@ -2128,8 +2198,9 @@ Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) { if (!modTime) return modTime.takeError(); - Expected<InputFile *> file = loadArchiveMember( - *mb, toTimeT(*modTime), getName(), c.getChildOffset(), forceHidden); + Expected<InputFile *> file = + loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(), + forceHidden, compatArch); if (!file) return file.takeError(); @@ -2192,13 +2263,20 @@ static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym, } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive, bool lazy, bool forceHidden) + uint64_t offsetInArchive, bool lazy, bool forceHidden, + bool compatArch) : InputFile(BitcodeKind, mb, lazy), forceHidden(forceHidden) { this->archiveName = std::string(archiveName); + this->compatArch = compatArch; std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) path = replaceThinLTOSuffix(mb.getBufferIdentifier()); + // If the parent archive already determines that the arch is not compat with + // target, then just return. + if (!compatArch) + return; + // ThinLTO assumes that all MemoryBufferRefs given to it have a unique // name. If two members with the same name are provided, this causes a // collision and ThinLTO can't proceed. @@ -2222,9 +2300,16 @@ void BitcodeFile::parse() { // Convert LTO Symbols to LLD Symbols in order to perform resolution. The // "winning" symbol will then be marked as Prevailing at LTO compilation // time. - symbols.clear(); - for (const lto::InputFile::Symbol &objSym : obj->symbols()) - symbols.push_back(createBitcodeSymbol(objSym, *this)); + symbols.resize(obj->symbols().size()); + + // Process defined symbols first. See the comment at the end of + // ObjFile<>::parseSymbols. + for (auto it : llvm::enumerate(obj->symbols())) + if (!it.value().isUndefined()) + symbols[it.index()] = createBitcodeSymbol(it.value(), *this); + for (auto it : llvm::enumerate(obj->symbols())) + if (it.value().isUndefined()) + symbols[it.index()] = createBitcodeSymbol(it.value(), *this); } void BitcodeFile::parseLazy() { diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 66d46e46fa73..2e37e7ba5a00 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -140,6 +140,9 @@ protected: InputFile(Kind, const llvm::MachO::InterfaceFile &); + // If true, this input's arch is compatiable with target. + bool compatArch = true; + private: const Kind fileKind; const StringRef name; @@ -157,10 +160,13 @@ struct FDE { class ObjFile final : public InputFile { public: ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName, - bool lazy = false, bool forceHidden = false); + bool lazy = false, bool forceHidden = false, bool compatArch = true, + bool builtFromBitcode = false); ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const; ArrayRef<uint8_t> getOptimizationHints() const; template <class LP> void parse(); + template <class LP> + void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions); static bool classof(const InputFile *f) { return f->kind() == ObjKind; } @@ -174,6 +180,7 @@ public: Section *addrSigSection = nullptr; const uint32_t modTime; bool forceHidden; + bool builtFromBitcode; std::vector<ConcatInputSection *> debugSections; std::vector<CallGraphEntry> callGraph; llvm::DenseMap<ConcatInputSection *, FDE> fdes; @@ -301,7 +308,7 @@ class BitcodeFile final : public InputFile { public: explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive, bool lazy = false, - bool forceHidden = false); + bool forceHidden = false, bool compatArch = true); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } void parse(); @@ -314,6 +321,7 @@ private: extern llvm::SetVector<InputFile *> inputFiles; extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads; +extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions; std::optional<MemoryBufferRef> readFile(StringRef path); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index fdae7e4bd1b7..7a9a9223a032 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -15,6 +15,7 @@ #include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" +#include "lld/Common/Filesystem.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" #include "llvm/Bitcode/BitcodeWriter.h" @@ -32,19 +33,6 @@ using namespace llvm; using namespace llvm::MachO; using namespace llvm::sys; -// Creates an empty file to store a list of object files for final -// linking of distributed ThinLTO. -static std::unique_ptr<raw_fd_ostream> openFile(StringRef file) { - std::error_code ec; - auto ret = - std::make_unique<raw_fd_ostream>(file, ec, sys::fs::OpenFlags::OF_None); - if (ec) { - error("cannot open " + file + ": " + ec.message()); - return nullptr; - } - return ret; -} - static std::string getThinLTOOutputFile(StringRef modulePath) { return lto::getThinLTOOutputFile(modulePath, config->thinLTOPrefixReplaceOld, config->thinLTOPrefixReplaceNew); @@ -71,6 +59,7 @@ static lto::Config createConfig() { c.DebugPassManager = config->ltoDebugPassManager; c.CSIRProfile = std::string(config->csProfilePath); c.RunCSIRInstr = config->csProfileGenerate; + c.PGOWarnMismatch = config->pgoWarnMismatch; c.OptLevel = config->ltoo; c.CGOptLevel = config->ltoCgo; if (config->saveTemps) @@ -305,7 +294,9 @@ std::vector<ObjFile *> BitcodeCompiler::compile() { modTime = getModTime(filePath); } ret.push_back(make<ObjFile>( - MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime, "")); + MemoryBufferRef(objBuf, saver().save(filePath.str())), modTime, + /*archiveName=*/"", /*lazy=*/false, + /*forceHidden=*/false, /*compatArch=*/true, /*builtFromBitcode=*/true)); } return ret; diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index b60f5e44c3c1..f92e6cda31e5 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -1,5 +1,10 @@ include "llvm/Option/OptParser.td" +multiclass BB<string name, string help1, string help2> { + def NAME: Flag<["--"], name>, HelpText<help1>; + def no_ # NAME: Flag<["--"], "no-" # name>, HelpText<help2>; +} + // Flags that lld/MachO understands but ld64 doesn't. These take // '--' instead of '-' and use dashes instead of underscores, so // they don't collide with the ld64 compat options. @@ -130,6 +135,9 @@ def cs_profile_generate: Flag<["--"], "cs-profile-generate">, HelpText<"Perform context senstive PGO instrumentation">, Group<grp_lld>; def cs_profile_path: Joined<["--"], "cs-profile-path=">, HelpText<"Context sensitive profile file path">, Group<grp_lld>; +defm pgo_warn_mismatch: BB<"pgo-warn-mismatch", + "turn on warnings about profile cfg mismatch (default)", + "turn off warnings about profile cfg mismatch">, Group<grp_lld>; // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 283edf24ab06..825242f2cc72 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -150,10 +150,48 @@ Defined *SymbolTable::addDefined(StringRef name, InputFile *file, overridesWeakDef = !isWeakDef && dysym->isWeakDef(); dysym->unreference(); } else if (auto *undef = dyn_cast<Undefined>(s)) { - // Preserve the original bitcode file name (instead of using the object - // file name). - if (undef->wasBitcodeSymbol) - file = undef->getFile(); + if (undef->wasBitcodeSymbol) { + auto objFile = dyn_cast<ObjFile>(file); + if (!objFile) { + // The file must be a native object file, as opposed to potentially + // being another bitcode file. A situation arises when some symbols + // are defined thru `module asm` and thus they are not present in the + // bitcode's symbol table. Consider bitcode modules `A`, `B`, and `C`. + // LTO compiles only `A` and `C`, since there's no explicit symbol + // reference to `B` other than a symbol from `A` via `module asm`. + // After LTO is finished, the missing symbol now appears in the + // resulting object file for `A`, which prematurely resolves another + // prevailing symbol with `B` that hasn't been compiled, instead of + // the resulting object for `C`. Consequently, an incorrect + // relocation is generated for the prevailing symbol. + assert(isa<BitcodeFile>(file) && "Bitcode file is expected."); + std::string message = + "The pending prevailing symbol(" + name.str() + + ") in the bitcode file(" + toString(undef->getFile()) + + ") is overridden by a non-native object (from bitcode): " + + toString(file); + error(message); + } else if (!objFile->builtFromBitcode) { + // Ideally, this should be an object file compiled from a bitcode + // file. However, this might not hold true if a LC linker option is + // used. In case LTO internalizes a prevailing hidden weak symbol, + // there's a situation where an unresolved prevailing symbol might be + // linked with the corresponding one from a native library, which is + // loaded later after LTO. Although this could potentially result in + // an ODR violation, we choose to permit this scenario as a warning. + std::string message = "The pending prevailing symbol(" + name.str() + + ") in the bitcode file(" + + toString(undef->getFile()) + + ") is overridden by a post-processed native " + "object (from native archive): " + + toString(file); + warn(message); + } else { + // Preserve the original bitcode file name (instead of using the + // object file name). + file = undef->getFile(); + } + } } // Defined symbols take priority over other types of symbols, so in case // of a name conflict, we fall through to the replaceSymbol() call below. diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h index 88efc7a18c76..f30294dbea9c 100644 --- a/lld/MachO/Symbols.h +++ b/lld/MachO/Symbols.h @@ -58,14 +58,14 @@ public: virtual uint64_t getVA() const { return 0; } - virtual bool isWeakDef() const { llvm_unreachable("cannot be weak def"); } + virtual bool isWeakDef() const { return false; } // Only undefined or dylib symbols can be weak references. A weak reference // need not be satisfied at runtime, e.g. due to the symbol not being // available on a given target platform. virtual bool isWeakRef() const { return false; } - virtual bool isTlv() const { llvm_unreachable("cannot be TLV"); } + virtual bool isTlv() const { return false; } // Whether this symbol is in the GOT or TLVPointer sections. bool isInGot() const { return gotIndex != UINT32_MAX; } diff --git a/lld/docs/ELF/linker_script.rst b/lld/docs/ELF/linker_script.rst index bc2037595e5f..3606ef4fe4b8 100644 --- a/lld/docs/ELF/linker_script.rst +++ b/lld/docs/ELF/linker_script.rst @@ -97,6 +97,16 @@ The presence of ``address`` can cause the condition unsatisfied. LLD will warn. GNU ld from Binutils 2.35 onwards will reduce sh_addralign so that sh_addr=0 (modulo sh_addralign). +When an output section has no input section, GNU ld will eliminate it if it +only contains symbol assignments (e.g. ``.foo { symbol = 42; }``). LLD will +retain such sections unless all the symbol assignments are unreferenced +``PROVIDED``. + +When an output section has no input section but advances the location counter, +GNU ld sets the ``SHF_WRITE`` flag. LLD sets the SHF_WRITE flag only if the +preceding output section with non-empty input sections also has the SHF_WRITE +flag. + Output section type ------------------- @@ -172,3 +182,18 @@ description in the ``OVERWRITE_SECTIONS`` command while the insert command still applies (possibly after orphan section placement). It is recommended to leave the brace empty (i.e. ``section : {}``) for the insert command, because its description will be ignored anyway. + +Built-in functions +~~~~~~~~~~~~~~~~~~ + +``DATA_SEGMENT_RELRO_END(offset, exp)`` defines the end of the ``PT_GNU_RELRO`` +segment when ``-z relro`` (default) is in effect. Sections between +``DATA_SEGMENT_ALIGN`` and ``DATA_SEGMENT_RELRO_END`` are considered RELRO. + +The typical use case is ``. = DATA_SEGMENT_RELRO_END(0, .);`` followed by +writable but non-RELRO sections. LLD ignores ``offset`` and ``exp`` and aligns +the current location to a max-page-size boundary, ensuring that the next +``PT_LOAD`` segment will not overlap with the ``PT_GNU_RELRO`` segment. + +LLD will insert ``.relro_padding`` immediately before the symbol assignment +using ``DATA_SEGMENT_RELRO_END``. diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index d8e34d1e6c74..c322b776ff58 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -26,65 +26,11 @@ Non-comprehensive list of changes in this release ELF Improvements ---------------- -* When ``--threads=`` is not specified, the number of concurrency is now capped to 16. - A large ``--thread=`` can harm performance, especially with some system - malloc implementations like glibc's. - (`D147493 <https://reviews.llvm.org/D147493>`_) -* ``--remap-inputs=`` and ``--remap-inputs-file=`` are added to remap input files. - (`D148859 <https://reviews.llvm.org/D148859>`_) -* ``--lto=`` is now available to support ``clang -funified-lto`` - (`D123805 <https://reviews.llvm.org/D123805>`_) -* ``--lto-CGO[0-3]`` is now available to control ``CodeGenOpt::Level`` independent of the LTO optimization level. - (`D141970 <https://reviews.llvm.org/D141970>`_) -* ``--check-dynamic-relocations=`` is now correct 32-bit targets when the addend is larger than 0x80000000. - (`D149347 <https://reviews.llvm.org/D149347>`_) -* ``--print-memory-usage`` has been implemented for memory regions. - (`D150644 <https://reviews.llvm.org/D150644>`_) -* ``SHF_MERGE``, ``--icf=``, and ``--build-id=fast`` have switched to 64-bit xxh3. - (`D154813 <https://reviews.llvm.org/D154813>`_) -* Quoted output section names can now be used in linker scripts. - (`#60496 <https://github.com/llvm/llvm-project/issues/60496>`_) -* ``MEMORY`` can now be used without a ``SECTIONS`` command. - (`D145132 <https://reviews.llvm.org/D145132>`_) -* ``REVERSE`` can now be used in input section descriptions to reverse the order of input sections. - (`D145381 <https://reviews.llvm.org/D145381>`_) -* Program header assignment can now be used within ``OVERLAY``. This functionality was accidentally lost in 2020. - (`D150445 <https://reviews.llvm.org/D150445>`_) -* Operators ``^`` and ``^=`` can now be used in linker scripts. -* LoongArch is now supported. -* ``DT_AARCH64_MEMTAG_*`` dynamic tags are now supported. - (`D143769 <https://reviews.llvm.org/D143769>`_) -* AArch32 port now supports BE-8 and BE-32 modes for big-endian. - (`D140201 <https://reviews.llvm.org/D140201>`_) - (`D140202 <https://reviews.llvm.org/D140202>`_) - (`D150870 <https://reviews.llvm.org/D150870>`_) -* ``R_ARM_THM_ALU_ABS_G*`` relocations are now supported. - (`D153407 <https://reviews.llvm.org/D153407>`_) -* ``.ARM.exidx`` sections may start at non-zero output section offset. - (`D148033 <https://reviews.llvm.org/D148033>`_) -* Arm Cortex-M Security Extensions is now implemented. - (`D139092 <https://reviews.llvm.org/D139092>`_) -* BTI landing pads are now added to PLT entries accessed by range extension thunks or relative vtables. - (`D148704 <https://reviews.llvm.org/D148704>`_) - (`D153264 <https://reviews.llvm.org/D153264>`_) -* AArch64 short range thunk has been implemented to mitigate the performance loss of a long range thunk. - (`D148701 <https://reviews.llvm.org/D148701>`_) -* ``R_AVR_8_LO8/R_AVR_8_HI8/R_AVR_8_HLO8/R_AVR_LO8_LDI_GS/R_AVR_HI8_LDI_GS`` have been implemented. - (`D147100 <https://reviews.llvm.org/D147100>`_) - (`D147364 <https://reviews.llvm.org/D147364>`_) -* ``--no-power10-stubs`` now works for PowerPC64. -* ``DT_PPC64_OPT`` is now supported; - (`D150631 <https://reviews.llvm.org/D150631>`_) -* ``PT_RISCV_ATTRIBUTES`` is added to include the SHT_RISCV_ATTRIBUTES section. - (`D152065 <https://reviews.llvm.org/D152065>`_) -* ``R_RISCV_PLT32`` is added to support C++ relative vtables. - (`D143115 <https://reviews.llvm.org/D143115>`_) -* RISC-V global pointer relaxation has been implemented. Specify ``--relax-gp`` to enable the linker relaxation. - (`D143673 <https://reviews.llvm.org/D143673>`_) -* The symbol value of ``foo`` is correctly handled when ``--wrap=foo`` and RISC-V linker relaxation are used. - (`D151768 <https://reviews.llvm.org/D151768>`_) -* x86-64 large data sections are now placed away from code sections to alleviate relocation overflow pressure. - (`D150510 <https://reviews.llvm.org/D150510>`_) +* ``--fat-lto-objects`` option is added to support LLVM FatLTO. + Without ``--fat-lto-objects``, LLD will link LLVM FatLTO objects using the + relocatable object file. (`D146778 <https://reviews.llvm.org/D146778>`_) +* common-page-size can now be larger than the system page-size. + (`#57618 <https://github.com/llvm/llvm-project/issues/57618>`_) Breaking changes ---------------- @@ -92,17 +38,8 @@ Breaking changes COFF Improvements ----------------- -* lld-link can now find libraries with relative paths that are relative to - `/libpath`. Before it would only be able to find libraries relative to the - current directory. - I.e. ``lld-link /libpath:c:\relative\root relative\path\my.lib`` where before - we would have to do ``lld-link /libpath:c:\relative\root\relative\path my.lib`` -* lld-link learned -print-search-paths that will print all the paths where it will - search for libraries. -* By default lld-link will now search for libraries in the toolchain directories. - Specifically it will search: - ``<toolchain>/lib``, ``<toolchain>/lib/clang/<version>/lib`` and - ``<toolchain>/lib/clang/<version>/lib/windows``. +* Added support for ``--time-trace`` and associated ``--time-trace-granularity``. + This generates a .json profile trace of the linker execution. MinGW Improvements ------------------ @@ -115,7 +52,3 @@ WebAssembly Improvements Fixes ##### - -* Arm exception index tables (.ARM.exidx sections) are now output - correctly when they are at a non zero offset within their output - section. (`D148033 <https://reviews.llvm.org/D148033>`_) diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 0a5e4293deda..12b17dd37796 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -82,6 +82,9 @@ Bind default visibility defined symbols locally for Also set the .Dv DF_SYMBOLIC flag. +.It Fl Bsymbolic-non-weak +Bind default visibility defined STB_GLOBAL symbols locally for +.Fl shared. .It Fl Bsymbolic-functions Bind default visibility defined function symbols locally for .Fl shared. @@ -117,6 +120,19 @@ is not intended to be cryptographically secure. .It Fl -build-id Synonym for .Fl -build-id Ns = Ns Cm fast . +.It Fl -call-graph-profile-sort Ns = Ns Ar algorithm +.Ar algorithm +may be: +.Pp +.Bl -tag -width 2n -compact +.It Cm none +Ignore call graph profile. +.It Cm hfsort +Use hfsort. +.It Cm cdsort +Use cdsort (default). +.El +.Pp .It Fl -color-diagnostics Ns = Ns Ar value Use colors in diagnostics. .Ar value @@ -621,6 +637,10 @@ Number of threads. (default) means all of concurrent threads supported. .Cm 1 disables multi-threading. +.It Fl -fat-lto-objects +Use the .llvm.lto section, which contains LLVM bitcode, in fat LTO object files to perform LTO. +.It Fl -no-fat-lto-objects +Ignore the .llvm.lto section in relocatable object files (default). .It Fl -time-trace Record time trace. .It Fl -time-trace-file Ns = Ns Ar file diff --git a/lld/include/lld/Common/Filesystem.h b/lld/include/lld/Common/Filesystem.h index 63a0f554a06c..61b32eec2ee7 100644 --- a/lld/include/lld/Common/Filesystem.h +++ b/lld/include/lld/Common/Filesystem.h @@ -10,11 +10,15 @@ #define LLD_FILESYSTEM_H #include "lld/Common/LLVM.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> #include <system_error> namespace lld { void unlinkAsync(StringRef path); std::error_code tryCreateFile(StringRef path); +std::unique_ptr<llvm::raw_fd_ostream> openFile(StringRef file); +std::unique_ptr<llvm::raw_fd_ostream> openLTOOutputFile(StringRef file); } // namespace lld #endif diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index 2c30bc905106..a0a7cb0be8f1 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -30,7 +30,6 @@ #include "lld/Common/Memory.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/CrashRecoveryContext.h" |
