diff options
Diffstat (limited to 'COFF')
-rw-r--r-- | COFF/Chunks.cpp | 327 | ||||
-rw-r--r-- | COFF/Chunks.h | 121 | ||||
-rw-r--r-- | COFF/Config.h | 5 | ||||
-rw-r--r-- | COFF/DLL.cpp | 102 | ||||
-rw-r--r-- | COFF/DLL.h | 9 | ||||
-rw-r--r-- | COFF/Driver.cpp | 553 | ||||
-rw-r--r-- | COFF/Driver.h | 4 | ||||
-rw-r--r-- | COFF/DriverUtils.cpp | 24 | ||||
-rw-r--r-- | COFF/ICF.cpp | 27 | ||||
-rw-r--r-- | COFF/InputFiles.cpp | 95 | ||||
-rw-r--r-- | COFF/InputFiles.h | 30 | ||||
-rw-r--r-- | COFF/LTO.cpp | 3 | ||||
-rw-r--r-- | COFF/MapFile.cpp | 2 | ||||
-rw-r--r-- | COFF/MarkLive.cpp | 8 | ||||
-rw-r--r-- | COFF/MinGW.cpp | 42 | ||||
-rw-r--r-- | COFF/MinGW.h | 6 | ||||
-rw-r--r-- | COFF/Options.td | 37 | ||||
-rw-r--r-- | COFF/PDB.cpp | 997 | ||||
-rw-r--r-- | COFF/PDB.h | 2 | ||||
-rw-r--r-- | COFF/SymbolTable.cpp | 185 | ||||
-rw-r--r-- | COFF/SymbolTable.h | 8 | ||||
-rw-r--r-- | COFF/Symbols.cpp | 9 | ||||
-rw-r--r-- | COFF/Symbols.h | 14 | ||||
-rw-r--r-- | COFF/Writer.cpp | 723 | ||||
-rw-r--r-- | COFF/Writer.h | 6 |
25 files changed, 2435 insertions, 904 deletions
diff --git a/COFF/Chunks.cpp b/COFF/Chunks.cpp index 412ff783222b..29131d7eb8db 100644 --- a/COFF/Chunks.cpp +++ b/COFF/Chunks.cpp @@ -11,6 +11,7 @@ #include "InputFiles.h" #include "Symbols.h" #include "Writer.h" +#include "SymbolTable.h" #include "lld/Common/ErrorHandler.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/COFF.h" @@ -44,6 +45,22 @@ SectionChunk::SectionChunk(ObjFile *F, const coff_section *H) Live = !Config->DoGC || !isCOMDAT(); } +// Initialize the RelocTargets vector, to allow redirecting certain relocations +// to a thunk instead of the actual symbol the relocation's symbol table index +// indicates. +void SectionChunk::readRelocTargets() { + assert(RelocTargets.empty()); + RelocTargets.reserve(Relocs.size()); + for (const coff_relocation &Rel : Relocs) + RelocTargets.push_back(File->getSymbol(Rel.SymbolTableIndex)); +} + +// Reset RelocTargets to their original targets before thunks were added. +void SectionChunk::resetRelocTargets() { + for (size_t I = 0, E = Relocs.size(); I < E; ++I) + RelocTargets[I] = File->getSymbol(Relocs[I].SymbolTableIndex); +} + static void add16(uint8_t *P, int16_t V) { write16le(P, read16le(P) + V); } static void add32(uint8_t *P, int32_t V) { write32le(P, read32le(P) + V); } static void add64(uint8_t *P, int64_t V) { write64le(P, read64le(P) + V); } @@ -58,7 +75,8 @@ static bool checkSecRel(const SectionChunk *Sec, OutputSection *OS) { return true; if (Sec->isCodeView()) return false; - fatal("SECREL relocation cannot be applied to absolute symbols"); + error("SECREL relocation cannot be applied to absolute symbols"); + return false; } static void applySecRel(const SectionChunk *Sec, uint8_t *Off, @@ -98,7 +116,7 @@ void SectionChunk::applyRelX64(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_AMD64_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_AMD64_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + toString(File)); } } @@ -113,7 +131,7 @@ void SectionChunk::applyRelX86(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_I386_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_I386_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + toString(File)); } } @@ -123,16 +141,22 @@ static void applyMOV(uint8_t *Off, uint16_t V) { write16le(Off + 2, (read16le(Off + 2) & 0x8f00) | ((V & 0x700) << 4) | (V & 0xff)); } -static uint16_t readMOV(uint8_t *Off) { +static uint16_t readMOV(uint8_t *Off, bool MOVT) { uint16_t Op1 = read16le(Off); + if ((Op1 & 0xfbf0) != (MOVT ? 0xf2c0 : 0xf240)) + error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") + + " instruction in MOV32T relocation"); uint16_t Op2 = read16le(Off + 2); + if ((Op2 & 0x8000) != 0) + error("unexpected instruction in " + Twine(MOVT ? "MOVT" : "MOVW") + + " instruction in MOV32T relocation"); return (Op2 & 0x00ff) | ((Op2 >> 4) & 0x0700) | ((Op1 << 1) & 0x0800) | ((Op1 & 0x000f) << 12); } void applyMOV32T(uint8_t *Off, uint32_t V) { - uint16_t ImmW = readMOV(Off); // read MOVW operand - uint16_t ImmT = readMOV(Off + 4); // read MOVT operand + uint16_t ImmW = readMOV(Off, false); // read MOVW operand + uint16_t ImmT = readMOV(Off + 4, true); // read MOVT operand uint32_t Imm = ImmW | (ImmT << 16); V += Imm; // add the immediate offset applyMOV(Off, V); // set MOVW operand @@ -141,7 +165,7 @@ void applyMOV32T(uint8_t *Off, uint32_t V) { static void applyBranch20T(uint8_t *Off, int32_t V) { if (!isInt<21>(V)) - fatal("relocation out of range"); + error("relocation out of range"); uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = (V >> 19) & 1; uint32_t J2 = (V >> 18) & 1; @@ -151,7 +175,7 @@ static void applyBranch20T(uint8_t *Off, int32_t V) { void applyBranch24T(uint8_t *Off, int32_t V) { if (!isInt<25>(V)) - fatal("relocation out of range"); + error("relocation out of range"); uint32_t S = V < 0 ? 1 : 0; uint32_t J1 = ((~V >> 23) & 1) ^ S; uint32_t J2 = ((~V >> 22) & 1) ^ S; @@ -176,7 +200,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_ARM_SECTION: applySecIdx(Off, OS); break; case IMAGE_REL_ARM_SECREL: applySecRel(this, Off, OS, S); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + toString(File)); } } @@ -184,7 +208,7 @@ void SectionChunk::applyRelARM(uint8_t *Off, uint16_t Type, OutputSection *OS, // Interpret the existing immediate value as a byte offset to the // target symbol, then update the instruction with the immediate as // the page offset from the current instruction to the target. -static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) { +void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) { uint32_t Orig = read32le(Off); uint64_t Imm = ((Orig >> 29) & 0x3) | ((Orig >> 3) & 0x1FFFFC); S += Imm; @@ -198,7 +222,7 @@ static void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift) { // Update the immediate field in a AARCH64 ldr, str, and add instruction. // Optionally limit the range of the written immediate by one or more bits // (RangeLimit). -static void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) { +void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit) { uint32_t Orig = read32le(Off); Imm += (Orig >> 10) & 0xFFF; Orig &= ~(0xFFF << 10); @@ -221,7 +245,7 @@ static void applyArm64Ldr(uint8_t *Off, uint64_t Imm) { if ((Orig & 0x4800000) == 0x4800000) Size += 4; if ((Imm & ((1 << Size) - 1)) != 0) - fatal("misaligned ldr/str offset"); + error("misaligned ldr/str offset"); applyArm64Imm(Off, Imm >> Size, Size); } @@ -250,21 +274,21 @@ static void applySecRelLdr(const SectionChunk *Sec, uint8_t *Off, applyArm64Ldr(Off, (S - OS->getRVA()) & 0xfff); } -static void applyArm64Branch26(uint8_t *Off, int64_t V) { +void applyArm64Branch26(uint8_t *Off, int64_t V) { if (!isInt<28>(V)) - fatal("relocation out of range"); + error("relocation out of range"); or32(Off, (V & 0x0FFFFFFC) >> 2); } static void applyArm64Branch19(uint8_t *Off, int64_t V) { if (!isInt<21>(V)) - fatal("relocation out of range"); + error("relocation out of range"); or32(Off, (V & 0x001FFFFC) << 3); } static void applyArm64Branch14(uint8_t *Off, int64_t V) { if (!isInt<16>(V)) - fatal("relocation out of range"); + error("relocation out of range"); or32(Off, (V & 0x0000FFFC) << 3); } @@ -287,11 +311,37 @@ void SectionChunk::applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, case IMAGE_REL_ARM64_SECREL_LOW12L: applySecRelLdr(this, Off, OS, S); break; case IMAGE_REL_ARM64_SECTION: applySecIdx(Off, OS); break; default: - fatal("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + + error("unsupported relocation type 0x" + Twine::utohexstr(Type) + " in " + toString(File)); } } +static void maybeReportRelocationToDiscarded(const SectionChunk *FromChunk, + Defined *Sym, + const coff_relocation &Rel) { + // Don't report these errors when the relocation comes from a debug info + // section or in mingw mode. MinGW mode object files (built by GCC) can + // have leftover sections with relocations against discarded comdat + // sections. Such sections are left as is, with relocations untouched. + if (FromChunk->isCodeView() || FromChunk->isDWARF() || Config->MinGW) + return; + + // Get the name of the symbol. If it's null, it was discarded early, so we + // have to go back to the object file. + ObjFile *File = FromChunk->File; + StringRef Name; + if (Sym) { + Name = Sym->getName(); + } else { + COFFSymbolRef COFFSym = + check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex)); + File->getCOFFObj()->getSymbolName(COFFSym, Name); + } + + error("relocation against symbol in discarded section: " + Name + + getSymbolLocations(File, Rel.SymbolTableIndex)); +} + void SectionChunk::writeTo(uint8_t *Buf) const { if (!hasData()) return; @@ -302,46 +352,40 @@ void SectionChunk::writeTo(uint8_t *Buf) const { // Apply relocations. size_t InputSize = getSize(); - for (const coff_relocation &Rel : Relocs) { + for (size_t I = 0, E = Relocs.size(); I < E; I++) { + const coff_relocation &Rel = Relocs[I]; + // Check for an invalid relocation offset. This check isn't perfect, because // we don't have the relocation size, which is only known after checking the // machine and relocation type. As a result, a relocation may overwrite the // beginning of the following input section. - if (Rel.VirtualAddress >= InputSize) - fatal("relocation points beyond the end of its parent section"); + if (Rel.VirtualAddress >= InputSize) { + error("relocation points beyond the end of its parent section"); + continue; + } uint8_t *Off = Buf + OutputSectionOff + Rel.VirtualAddress; + // Use the potentially remapped Symbol instead of the one that the + // relocation points to. + auto *Sym = dyn_cast_or_null<Defined>(RelocTargets[I]); + // Get the output section of the symbol for this relocation. The output // section is needed to compute SECREL and SECTION relocations used in debug // info. - auto *Sym = - dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex)); - if (!Sym) { - if (isCodeView() || isDWARF()) - continue; - // Symbols in early discarded sections are represented using null pointers, - // so we need to retrieve the name from the object file. - COFFSymbolRef Sym = - check(File->getCOFFObj()->getSymbol(Rel.SymbolTableIndex)); - StringRef Name; - File->getCOFFObj()->getSymbolName(Sym, Name); - fatal("relocation against symbol in discarded section: " + Name); - } - Chunk *C = Sym->getChunk(); + Chunk *C = Sym ? Sym->getChunk() : nullptr; OutputSection *OS = C ? C->getOutputSection() : nullptr; - // Only absolute and __ImageBase symbols lack an output section. For any - // other symbol, this indicates that the chunk was discarded. Normally - // relocations against discarded sections are an error. However, debug info - // sections are not GC roots and can end up with these kinds of relocations. - // Skip these relocations. - if (!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym)) { - if (isCodeView() || isDWARF()) - continue; - fatal("relocation against symbol in discarded section: " + - Sym->getName()); + // Skip the relocation if it refers to a discarded section, and diagnose it + // as an error if appropriate. If a symbol was discarded early, it may be + // null. If it was discarded late, the output section will be null, unless + // it was an absolute or synthetic symbol. + if (!Sym || + (!OS && !isa<DefinedAbsolute>(Sym) && !isa<DefinedSynthetic>(Sym))) { + maybeReportRelocationToDiscarded(this, Sym, Rel); + continue; } + uint64_t S = Sym->getRVA(); // Compute the RVA of the relocation for relative relocations. @@ -399,17 +443,125 @@ static uint8_t getBaserelType(const coff_relocation &Rel) { // fixed by the loader if load-time relocation is needed. // Only called when base relocation is enabled. void SectionChunk::getBaserels(std::vector<Baserel> *Res) { - for (const coff_relocation &Rel : Relocs) { + for (size_t I = 0, E = Relocs.size(); I < E; I++) { + const coff_relocation &Rel = Relocs[I]; uint8_t Ty = getBaserelType(Rel); if (Ty == IMAGE_REL_BASED_ABSOLUTE) continue; - Symbol *Target = File->getSymbol(Rel.SymbolTableIndex); + // Use the potentially remapped Symbol instead of the one that the + // relocation points to. + Symbol *Target = RelocTargets[I]; if (!Target || isa<DefinedAbsolute>(Target)) continue; Res->emplace_back(RVA + Rel.VirtualAddress, Ty); } } +// MinGW specific. +// Check whether a static relocation of type Type can be deferred and +// handled at runtime as a pseudo relocation (for references to a module +// local variable, which turned out to actually need to be imported from +// another DLL) This returns the size the relocation is supposed to update, +// in bits, or 0 if the relocation cannot be handled as a runtime pseudo +// relocation. +static int getRuntimePseudoRelocSize(uint16_t Type) { + // Relocations that either contain an absolute address, or a plain + // relative offset, since the runtime pseudo reloc implementation + // adds 8/16/32/64 bit values to a memory address. + // + // Given a pseudo relocation entry, + // + // typedef struct { + // DWORD sym; + // DWORD target; + // DWORD flags; + // } runtime_pseudo_reloc_item_v2; + // + // the runtime relocation performs this adjustment: + // *(base + .target) += *(base + .sym) - (base + .sym) + // + // This works for both absolute addresses (IMAGE_REL_*_ADDR32/64, + // IMAGE_REL_I386_DIR32, where the memory location initially contains + // the address of the IAT slot, and for relative addresses (IMAGE_REL*_REL32), + // where the memory location originally contains the relative offset to the + // IAT slot. + // + // This requires the target address to be writable, either directly out of + // the image, or temporarily changed at runtime with VirtualProtect. + // Since this only operates on direct address values, it doesn't work for + // ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations. + switch (Config->Machine) { + case AMD64: + switch (Type) { + case IMAGE_REL_AMD64_ADDR64: + return 64; + case IMAGE_REL_AMD64_ADDR32: + case IMAGE_REL_AMD64_REL32: + case IMAGE_REL_AMD64_REL32_1: + case IMAGE_REL_AMD64_REL32_2: + case IMAGE_REL_AMD64_REL32_3: + case IMAGE_REL_AMD64_REL32_4: + case IMAGE_REL_AMD64_REL32_5: + return 32; + default: + return 0; + } + case I386: + switch (Type) { + case IMAGE_REL_I386_DIR32: + case IMAGE_REL_I386_REL32: + return 32; + default: + return 0; + } + case ARMNT: + switch (Type) { + case IMAGE_REL_ARM_ADDR32: + return 32; + default: + return 0; + } + case ARM64: + switch (Type) { + case IMAGE_REL_ARM64_ADDR64: + return 64; + case IMAGE_REL_ARM64_ADDR32: + return 32; + default: + return 0; + } + default: + llvm_unreachable("unknown machine type"); + } +} + +// MinGW specific. +// Append information to the provided vector about all relocations that +// need to be handled at runtime as runtime pseudo relocations (references +// to a module local variable, which turned out to actually need to be +// imported from another DLL). +void SectionChunk::getRuntimePseudoRelocs( + std::vector<RuntimePseudoReloc> &Res) { + for (const coff_relocation &Rel : Relocs) { + auto *Target = + dyn_cast_or_null<Defined>(File->getSymbol(Rel.SymbolTableIndex)); + if (!Target || !Target->IsRuntimePseudoReloc) + continue; + int SizeInBits = getRuntimePseudoRelocSize(Rel.Type); + if (SizeInBits == 0) { + error("unable to automatically import from " + Target->getName() + + " with relocation type " + + File->getCOFFObj()->getRelocationTypeName(Rel.Type) + " in " + + toString(File)); + continue; + } + // SizeInBits is used to initialize the Flags field; currently no + // other flags are defined. + Res.emplace_back( + RuntimePseudoReloc(Target, this, Rel.VirtualAddress, SizeInBits)); + } +} + bool SectionChunk::hasData() const { return !(Header->Characteristics & IMAGE_SCN_CNT_UNINITIALIZED_DATA); } @@ -447,6 +599,13 @@ void SectionChunk::replace(SectionChunk *Other) { Other->Live = false; } +uint32_t SectionChunk::getSectionNumber() const { + DataRefImpl R; + R.p = reinterpret_cast<uintptr_t>(Header); + SectionRef S(R, File->getCOFFObj()); + return S.getIndex() + 1; +} + CommonChunk::CommonChunk(const COFFSymbolRef S) : Sym(S) { // Common symbols are aligned on natural boundaries up to 32 bytes. // This is what MSVC link.exe does. @@ -460,6 +619,7 @@ uint32_t CommonChunk::getOutputCharacteristics() const { void StringChunk::writeTo(uint8_t *Buf) const { memcpy(Buf + OutputSectionOff, Str.data(), Str.size()); + Buf[OutputSectionOff + Str.size()] = '\0'; } ImportThunkChunkX64::ImportThunkChunkX64(Defined *S) : ImpSymbol(S) { @@ -502,13 +662,30 @@ void ImportThunkChunkARM64::writeTo(uint8_t *Buf) const { applyArm64Ldr(Buf + OutputSectionOff + 4, Off); } +// A Thumb2, PIC, non-interworking range extension thunk. +const uint8_t ArmThunk[] = { + 0x40, 0xf2, 0x00, 0x0c, // P: movw ip,:lower16:S - (P + (L1-P) + 4) + 0xc0, 0xf2, 0x00, 0x0c, // movt ip,:upper16:S - (P + (L1-P) + 4) + 0xe7, 0x44, // L1: add pc, ip +}; + +size_t RangeExtensionThunk::getSize() const { + assert(Config->Machine == ARMNT); + return sizeof(ArmThunk); +} + +void RangeExtensionThunk::writeTo(uint8_t *Buf) const { + assert(Config->Machine == ARMNT); + uint64_t Offset = Target->getRVA() - RVA - 12; + memcpy(Buf + OutputSectionOff, ArmThunk, sizeof(ArmThunk)); + applyMOV32T(Buf + OutputSectionOff, uint32_t(Offset)); +} + void LocalImportChunk::getBaserels(std::vector<Baserel> *Res) { Res->emplace_back(getRVA()); } -size_t LocalImportChunk::getSize() const { - return Config->is64() ? 8 : 4; -} +size_t LocalImportChunk::getSize() const { return Config->Wordsize; } void LocalImportChunk::writeTo(uint8_t *Buf) const { if (Config->is64()) { @@ -528,6 +705,34 @@ void RVATableChunk::writeTo(uint8_t *Buf) const { "RVA tables should be de-duplicated"); } +// MinGW specific, for the "automatic import of variables from DLLs" feature. +size_t PseudoRelocTableChunk::getSize() const { + if (Relocs.empty()) + return 0; + return 12 + 12 * Relocs.size(); +} + +// MinGW specific. +void PseudoRelocTableChunk::writeTo(uint8_t *Buf) const { + if (Relocs.empty()) + return; + + ulittle32_t *Table = reinterpret_cast<ulittle32_t *>(Buf + OutputSectionOff); + // This is the list header, to signal the runtime pseudo relocation v2 + // format. + Table[0] = 0; + Table[1] = 0; + Table[2] = 1; + + size_t Idx = 3; + for (const RuntimePseudoReloc &RPR : Relocs) { + Table[Idx + 0] = RPR.Sym->getRVA(); + Table[Idx + 1] = RPR.Target->getRVA() + RPR.TargetOffset; + Table[Idx + 2] = RPR.Flags; + Idx += 3; + } +} + // Windows-specific. This class represents a block in .reloc section. // The format is described here. // @@ -613,13 +818,16 @@ void MergeChunk::addSection(SectionChunk *C) { } void MergeChunk::finalizeContents() { - for (SectionChunk *C : Sections) - if (C->isLive()) - Builder.add(toStringRef(C->getContents())); - Builder.finalize(); + if (!Finalized) { + for (SectionChunk *C : Sections) + if (C->Live) + Builder.add(toStringRef(C->getContents())); + Builder.finalize(); + Finalized = true; + } for (SectionChunk *C : Sections) { - if (!C->isLive()) + if (!C->Live) continue; size_t Off = Builder.getOffset(toStringRef(C->getContents())); C->setOutputSection(Out); @@ -640,5 +848,16 @@ void MergeChunk::writeTo(uint8_t *Buf) const { Builder.write(Buf + OutputSectionOff); } +// MinGW specific. +size_t AbsolutePointerChunk::getSize() const { return Config->Wordsize; } + +void AbsolutePointerChunk::writeTo(uint8_t *Buf) const { + if (Config->is64()) { + write64le(Buf + OutputSectionOff, Value); + } else { + write32le(Buf + OutputSectionOff, Value); + } +} + } // namespace coff } // namespace lld diff --git a/COFF/Chunks.h b/COFF/Chunks.h index 9e896531bd9a..f8a0ddd8ef3b 100644 --- a/COFF/Chunks.h +++ b/COFF/Chunks.h @@ -36,6 +36,7 @@ class DefinedImportData; class DefinedRegular; class ObjFile; class OutputSection; +class RuntimePseudoReloc; class Symbol; // Mask for permissions (discardable, writable, readable, executable, etc). @@ -63,6 +64,13 @@ public: // before calling this function. virtual void writeTo(uint8_t *Buf) const {} + // Called by the writer once before assigning addresses and writing + // the output. + virtual void readRelocTargets() {} + + // Called if restarting thunk addition. + virtual void resetRelocTargets() {} + // Called by the writer after an RVA is assigned, but before calling // getSize(). virtual void finalizeContents() {} @@ -114,6 +122,10 @@ protected: public: // The offset from beginning of the output section. The writer sets a value. uint64_t OutputSectionOff = 0; + + // Whether this section needs to be kept distinct from other sections during + // ICF. This is set by the driver using address-significance tables. + bool KeepUnique = false; }; // A chunk corresponding a section of an input file. @@ -140,6 +152,8 @@ public: SectionChunk(ObjFile *File, const coff_section *Header); static bool classof(const Chunk *C) { return C->kind() == SectionKind; } + void readRelocTargets() override; + void resetRelocTargets() override; size_t getSize() const override { return Header->SizeOfRawData; } ArrayRef<uint8_t> getContents() const; void writeTo(uint8_t *Buf) const override; @@ -157,6 +171,8 @@ public: void applyRelARM64(uint8_t *Off, uint16_t Type, OutputSection *OS, uint64_t S, uint64_t P) const; + void getRuntimePseudoRelocs(std::vector<RuntimePseudoReloc> &Res); + // Called if the garbage collector decides to not include this chunk // in a final output. It's supposed to print out a log message to stdout. void printDiscardedMessage() const; @@ -167,16 +183,6 @@ public: StringRef getDebugName() override; - // Returns true if the chunk was not dropped by GC. - bool isLive() { return Live; } - - // Used by the garbage collector. - void markLive() { - assert(Config->DoGC && "should only mark things live from GC"); - assert(!isLive() && "Cannot mark an already live section!"); - Live = true; - } - // True if this is a codeview debug info chunk. These will not be laid out in // the image. Instead they will end up in the PDB, if one is requested. bool isCodeView() const { @@ -197,10 +203,13 @@ public: // Allow iteration over the associated child chunks for this section. ArrayRef<SectionChunk *> children() const { return AssocChildren; } + // The section ID this chunk belongs to in its Obj. + uint32_t getSectionNumber() const; + // A pointer pointing to a replacement for this chunk. // Initially it points to "this" object. If this chunk is merged // with other chunk by ICF, it points to another chunk, - // and this chunk is considrered as dead. + // and this chunk is considered as dead. SectionChunk *Repl; // The CRC of the contents as described in the COFF spec 4.5.5. @@ -217,13 +226,17 @@ public: ArrayRef<coff_relocation> Relocs; + // Used by the garbage collector. + bool Live; + + // When inserting a thunk, we need to adjust a relocation to point to + // the thunk instead of the actual original target Symbol. + std::vector<Symbol *> RelocTargets; + private: StringRef SectionName; std::vector<SectionChunk *> AssocChildren; - // Used by the garbage collector. - bool Live; - // Used for ICF (Identical COMDAT Folding) void replace(SectionChunk *Other); uint32_t Class[2] = {0, 0}; @@ -254,6 +267,7 @@ public: private: llvm::StringTableBuilder Builder; + bool Finalized = false; }; // A chunk for common symbols. Common chunks don't have actual data. @@ -297,7 +311,7 @@ static const uint8_t ImportThunkARM64[] = { }; // Windows-specific. -// A chunk for DLL import jump table entry. In a final output, it's +// A chunk for DLL import jump table entry. In a final output, its // contents will be a JMP instruction to some __imp_ symbol. class ImportThunkChunkX64 : public Chunk { public: @@ -341,11 +355,22 @@ private: Defined *ImpSymbol; }; +class RangeExtensionThunk : public Chunk { +public: + explicit RangeExtensionThunk(Defined *T) : Target(T) {} + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + + Defined *Target; +}; + // Windows-specific. // See comments for DefinedLocalImport class. class LocalImportChunk : public Chunk { public: - explicit LocalImportChunk(Defined *S) : Sym(S) {} + explicit LocalImportChunk(Defined *S) : Sym(S) { + Alignment = Config->Wordsize; + } size_t getSize() const override; void getBaserels(std::vector<Baserel> *Res) override; void writeTo(uint8_t *Buf) const override; @@ -414,9 +439,73 @@ public: uint8_t Type; }; +// This is a placeholder Chunk, to allow attaching a DefinedSynthetic to a +// specific place in a section, without any data. This is used for the MinGW +// specific symbol __RUNTIME_PSEUDO_RELOC_LIST_END__, even though the concept +// of an empty chunk isn't MinGW specific. +class EmptyChunk : public Chunk { +public: + EmptyChunk() {} + size_t getSize() const override { return 0; } + void writeTo(uint8_t *Buf) const override {} +}; + +// MinGW specific, for the "automatic import of variables from DLLs" feature. +// This provides the table of runtime pseudo relocations, for variable +// references that turned out to need to be imported from a DLL even though +// the reference didn't use the dllimport attribute. The MinGW runtime will +// process this table after loading, before handling control over to user +// code. +class PseudoRelocTableChunk : public Chunk { +public: + PseudoRelocTableChunk(std::vector<RuntimePseudoReloc> &Relocs) + : Relocs(std::move(Relocs)) { + Alignment = 4; + } + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + +private: + std::vector<RuntimePseudoReloc> Relocs; +}; + +// MinGW specific; information about one individual location in the image +// that needs to be fixed up at runtime after loading. This represents +// one individual element in the PseudoRelocTableChunk table. +class RuntimePseudoReloc { +public: + RuntimePseudoReloc(Defined *Sym, SectionChunk *Target, uint32_t TargetOffset, + int Flags) + : Sym(Sym), Target(Target), TargetOffset(TargetOffset), Flags(Flags) {} + + Defined *Sym; + SectionChunk *Target; + uint32_t TargetOffset; + // The Flags field contains the size of the relocation, in bits. No other + // flags are currently defined. + int Flags; +}; + +// MinGW specific. A Chunk that contains one pointer-sized absolute value. +class AbsolutePointerChunk : public Chunk { +public: + AbsolutePointerChunk(uint64_t Value) : Value(Value) { + Alignment = getSize(); + } + size_t getSize() const override; + void writeTo(uint8_t *Buf) const override; + +private: + uint64_t Value; +}; + void applyMOV32T(uint8_t *Off, uint32_t V); void applyBranch24T(uint8_t *Off, int32_t V); +void applyArm64Addr(uint8_t *Off, uint64_t S, uint64_t P, int Shift); +void applyArm64Imm(uint8_t *Off, uint64_t Imm, uint32_t RangeLimit); +void applyArm64Branch26(uint8_t *Off, int64_t V); + } // namespace coff } // namespace lld diff --git a/COFF/Config.h b/COFF/Config.h index 3ae50b868333..8915b6a3bdd8 100644 --- a/COFF/Config.h +++ b/COFF/Config.h @@ -84,6 +84,7 @@ struct Configuration { bool is64() { return Machine == AMD64 || Machine == ARM64; } llvm::COFF::MachineTypes Machine = IMAGE_FILE_MACHINE_UNKNOWN; + size_t Wordsize; bool Verbose = false; WindowsSubsystem Subsystem = llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN; Symbol *Entry = nullptr; @@ -94,7 +95,8 @@ struct Configuration { bool DoICF = true; bool TailMerge; bool Relocatable = true; - bool Force = false; + bool ForceMultiple = false; + bool ForceUnresolved = false; bool Debug = false; bool DebugDwarf = false; bool DebugGHashes = false; @@ -195,6 +197,7 @@ struct Configuration { bool MinGW = false; bool WarnMissingOrderSymbol = true; bool WarnLocallyDefinedImported = true; + bool WarnDebugInfoUnusable = true; bool Incremental = true; bool IntegrityCheck = false; bool KillAt = false; diff --git a/COFF/DLL.cpp b/COFF/DLL.cpp index 464abe8e0894..599cc5892a16 100644 --- a/COFF/DLL.cpp +++ b/COFF/DLL.cpp @@ -35,8 +35,6 @@ namespace { // Import table -static int ptrSize() { return Config->is64() ? 8 : 4; } - // A chunk for the import descriptor table. class HintNameChunk : public Chunk { public: @@ -61,8 +59,8 @@ private: // A chunk for the import descriptor table. class LookupChunk : public Chunk { public: - explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = ptrSize(); } - size_t getSize() const override { return ptrSize(); } + explicit LookupChunk(Chunk *C) : HintName(C) { Alignment = Config->Wordsize; } + size_t getSize() const override { return Config->Wordsize; } void writeTo(uint8_t *Buf) const override { write32le(Buf + OutputSectionOff, HintName->getRVA()); @@ -76,8 +74,10 @@ public: // See Microsoft PE/COFF spec 7.1. Import Header for details. class OrdinalOnlyChunk : public Chunk { public: - explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) { Alignment = ptrSize(); } - size_t getSize() const override { return ptrSize(); } + explicit OrdinalOnlyChunk(uint16_t V) : Ordinal(V) { + Alignment = Config->Wordsize; + } + size_t getSize() const override { return Config->Wordsize; } void writeTo(uint8_t *Buf) const override { // An import-by-ordinal slot has MSB 1 to indicate that @@ -230,6 +230,36 @@ static const uint8_t ThunkARM[] = { 0x60, 0x47, // bx ip }; +static const uint8_t ThunkARM64[] = { + 0x11, 0x00, 0x00, 0x90, // adrp x17, #0 __imp_<FUNCNAME> + 0x31, 0x02, 0x00, 0x91, // add x17, x17, #0 :lo12:__imp_<FUNCNAME> + 0xfd, 0x7b, 0xb3, 0xa9, // stp x29, x30, [sp, #-208]! + 0xfd, 0x03, 0x00, 0x91, // mov x29, sp + 0xe0, 0x07, 0x01, 0xa9, // stp x0, x1, [sp, #16] + 0xe2, 0x0f, 0x02, 0xa9, // stp x2, x3, [sp, #32] + 0xe4, 0x17, 0x03, 0xa9, // stp x4, x5, [sp, #48] + 0xe6, 0x1f, 0x04, 0xa9, // stp x6, x7, [sp, #64] + 0xe0, 0x87, 0x02, 0xad, // stp q0, q1, [sp, #80] + 0xe2, 0x8f, 0x03, 0xad, // stp q2, q3, [sp, #112] + 0xe4, 0x97, 0x04, 0xad, // stp q4, q5, [sp, #144] + 0xe6, 0x9f, 0x05, 0xad, // stp q6, q7, [sp, #176] + 0xe1, 0x03, 0x11, 0xaa, // mov x1, x17 + 0x00, 0x00, 0x00, 0x90, // adrp x0, #0 DELAY_IMPORT_DESCRIPTOR + 0x00, 0x00, 0x00, 0x91, // add x0, x0, #0 :lo12:DELAY_IMPORT_DESCRIPTOR + 0x00, 0x00, 0x00, 0x94, // bl #0 __delayLoadHelper2 + 0xf0, 0x03, 0x00, 0xaa, // mov x16, x0 + 0xe6, 0x9f, 0x45, 0xad, // ldp q6, q7, [sp, #176] + 0xe4, 0x97, 0x44, 0xad, // ldp q4, q5, [sp, #144] + 0xe2, 0x8f, 0x43, 0xad, // ldp q2, q3, [sp, #112] + 0xe0, 0x87, 0x42, 0xad, // ldp q0, q1, [sp, #80] + 0xe6, 0x1f, 0x44, 0xa9, // ldp x6, x7, [sp, #64] + 0xe4, 0x17, 0x43, 0xa9, // ldp x4, x5, [sp, #48] + 0xe2, 0x0f, 0x42, 0xa9, // ldp x2, x3, [sp, #32] + 0xe0, 0x07, 0x41, 0xa9, // ldp x0, x1, [sp, #16] + 0xfd, 0x7b, 0xcd, 0xa8, // ldp x29, x30, [sp], #208 + 0x00, 0x02, 0x1f, 0xd6, // br x16 +}; + // A chunk for the delay import thunk. class ThunkChunkX64 : public Chunk { public: @@ -298,11 +328,35 @@ public: Defined *Helper = nullptr; }; +class ThunkChunkARM64 : public Chunk { +public: + ThunkChunkARM64(Defined *I, Chunk *D, Defined *H) + : Imp(I), Desc(D), Helper(H) {} + + size_t getSize() const override { return sizeof(ThunkARM64); } + + void writeTo(uint8_t *Buf) const override { + memcpy(Buf + OutputSectionOff, ThunkARM64, sizeof(ThunkARM64)); + applyArm64Addr(Buf + OutputSectionOff + 0, Imp->getRVA(), RVA + 0, 12); + applyArm64Imm(Buf + OutputSectionOff + 4, Imp->getRVA() & 0xfff, 0); + applyArm64Addr(Buf + OutputSectionOff + 52, Desc->getRVA(), RVA + 52, 12); + applyArm64Imm(Buf + OutputSectionOff + 56, Desc->getRVA() & 0xfff, 0); + applyArm64Branch26(Buf + OutputSectionOff + 60, + Helper->getRVA() - RVA - 60); + } + + Defined *Imp = nullptr; + Chunk *Desc = nullptr; + Defined *Helper = nullptr; +}; + // A chunk for the import descriptor table. class DelayAddressChunk : public Chunk { public: - explicit DelayAddressChunk(Chunk *C) : Thunk(C) { Alignment = ptrSize(); } - size_t getSize() const override { return ptrSize(); } + explicit DelayAddressChunk(Chunk *C) : Thunk(C) { + Alignment = Config->Wordsize; + } + size_t getSize() const override { return Config->Wordsize; } void writeTo(uint8_t *Buf) const override { if (Config->is64()) { @@ -362,6 +416,8 @@ public: size_t getSize() const override { return Size * 4; } void writeTo(uint8_t *Buf) const override { + memset(Buf + OutputSectionOff, 0, getSize()); + for (const Export &E : Config->Exports) { uint8_t *P = Buf + OutputSectionOff + E.Ordinal * 4; uint32_t Bit = 0; @@ -418,30 +474,6 @@ private: } // anonymous namespace -uint64_t IdataContents::getDirSize() { - return Dirs.size() * sizeof(ImportDirectoryTableEntry); -} - -uint64_t IdataContents::getIATSize() { - return Addresses.size() * ptrSize(); -} - -// Returns a list of .idata contents. -// See Microsoft PE/COFF spec 5.4 for details. -std::vector<Chunk *> IdataContents::getChunks() { - create(); - - // The loader assumes a specific order of data. - // Add each type in the correct order. - std::vector<Chunk *> V; - V.insert(V.end(), Dirs.begin(), Dirs.end()); - V.insert(V.end(), Lookups.begin(), Lookups.end()); - V.insert(V.end(), Addresses.begin(), Addresses.end()); - V.insert(V.end(), Hints.begin(), Hints.end()); - V.insert(V.end(), DLLNames.begin(), DLLNames.end()); - return V; -} - void IdataContents::create() { std::vector<std::vector<DefinedImportData *>> V = binImports(Imports); @@ -465,8 +497,8 @@ void IdataContents::create() { Hints.push_back(C); } // Terminate with null values. - Lookups.push_back(make<NullChunk>(ptrSize())); - Addresses.push_back(make<NullChunk>(ptrSize())); + Lookups.push_back(make<NullChunk>(Config->Wordsize)); + Addresses.push_back(make<NullChunk>(Config->Wordsize)); for (int I = 0, E = Syms.size(); I < E; ++I) Syms[I]->setLocation(Addresses[Base + I]); @@ -555,6 +587,8 @@ Chunk *DelayLoadContents::newThunkChunk(DefinedImportData *S, Chunk *Dir) { return make<ThunkChunkX86>(S, Dir, Helper); case ARMNT: return make<ThunkChunkARM>(S, Dir, Helper); + case ARM64: + return make<ThunkChunkARM64>(S, Dir, Helper); default: llvm_unreachable("unsupported machine type"); } diff --git a/COFF/DLL.h b/COFF/DLL.h index c5d6e7c93abf..a298271e2c0d 100644 --- a/COFF/DLL.h +++ b/COFF/DLL.h @@ -19,19 +19,12 @@ namespace coff { // Windows-specific. // IdataContents creates all chunks for the DLL import table. // You are supposed to call add() to add symbols and then -// call getChunks() to get a list of chunks. +// call create() to populate the chunk vectors. class IdataContents { public: void add(DefinedImportData *Sym) { Imports.push_back(Sym); } bool empty() { return Imports.empty(); } - std::vector<Chunk *> getChunks(); - uint64_t getDirRVA() { return Dirs[0]->getRVA(); } - uint64_t getDirSize(); - uint64_t getIATRVA() { return Addresses[0]->getRVA(); } - uint64_t getIATSize(); - -private: void create(); std::vector<DefinedImportData *> Imports; diff --git a/COFF/Driver.cpp b/COFF/Driver.cpp index eefdb48beadd..2e4b1e6d3147 100644 --- a/COFF/Driver.cpp +++ b/COFF/Driver.cpp @@ -32,6 +32,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" @@ -56,7 +57,7 @@ Configuration *Config; LinkerDriver *Driver; bool link(ArrayRef<const char *> Args, bool CanExitEarly, raw_ostream &Diag) { - errorHandler().LogName = sys::path::filename(Args[0]); + errorHandler().LogName = args::getFilenameWithoutExe(Args[0]); errorHandler().ErrorOS = &Diag; errorHandler().ColorDiagnostics = Diag.has_colors(); errorHandler().ErrorLimitExceededMsg = @@ -116,6 +117,19 @@ static std::future<MBErrPair> createFutureForFile(std::string Path) { }); } +// Symbol names are mangled by prepending "_" on x86. +static StringRef mangle(StringRef Sym) { + assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); + if (Config->Machine == I386) + return Saver.save("_" + Sym); + return Sym; +} + +static bool findUnderscoreMangle(StringRef Sym) { + StringRef Entry = Symtab->findMangle(mangle(Sym)); + return !Entry.empty() && !isa<Undefined>(Symtab->find(Entry)); +} + MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr<MemoryBuffer> MB) { MemoryBufferRef MBRef = *MB; make<std::unique_ptr<MemoryBuffer>>(std::move(MB)); // take ownership @@ -357,13 +371,30 @@ Optional<StringRef> LinkerDriver::findFile(StringRef Filename) { return Path; } +// MinGW specific. If an embedded directive specified to link to +// foo.lib, but it isn't found, try libfoo.a instead. +StringRef LinkerDriver::doFindLibMinGW(StringRef Filename) { + if (Filename.contains('/') || Filename.contains('\\')) + return Filename; + + SmallString<128> S = Filename; + sys::path::replace_extension(S, ".a"); + StringRef LibName = Saver.save("lib" + S.str()); + return doFindFile(LibName); +} + // Find library file from search path. StringRef LinkerDriver::doFindLib(StringRef Filename) { // Add ".lib" to Filename if that has no file extension. bool HasExt = Filename.contains('.'); if (!HasExt) Filename = Saver.save(Filename + ".lib"); - return doFindFile(Filename); + StringRef Ret = doFindFile(Filename); + // For MinGW, if the find above didn't turn up anything, try + // looking for a MinGW formatted library name. + if (Config->MinGW && Ret == Filename) + return doFindLibMinGW(Filename); + return Ret; } // Resolves a library path. /nodefaultlib options are taken into @@ -407,54 +438,57 @@ Symbol *LinkerDriver::addUndefined(StringRef Name) { return B; } -// Symbol names are mangled by appending "_" prefix on x86. -StringRef LinkerDriver::mangle(StringRef Sym) { - assert(Config->Machine != IMAGE_FILE_MACHINE_UNKNOWN); - if (Config->Machine == I386) - return Saver.save("_" + Sym); - return Sym; -} - // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, // each of which corresponds to a user-defined "main" function. This function // infers an entry point from a user-defined "main" function. StringRef LinkerDriver::findDefaultEntry() { - // As a special case, if /nodefaultlib is given, we directly look for an - // entry point. This is because, if no default library is linked, users - // need to define an entry point instead of a "main". - if (Config->NoDefaultLibAll) { - for (StringRef S : {"mainCRTStartup", "wmainCRTStartup", - "WinMainCRTStartup", "wWinMainCRTStartup"}) { - StringRef Entry = Symtab->findMangle(S); - if (!Entry.empty() && !isa<Undefined>(Symtab->find(Entry))) - return mangle(S); + assert(Config->Subsystem != IMAGE_SUBSYSTEM_UNKNOWN && + "must handle /subsystem before calling this"); + + if (Config->MinGW) + return mangle(Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI + ? "WinMainCRTStartup" + : "mainCRTStartup"); + + if (Config->Subsystem == IMAGE_SUBSYSTEM_WINDOWS_GUI) { + if (findUnderscoreMangle("wWinMain")) { + if (!findUnderscoreMangle("WinMain")) + return mangle("wWinMainCRTStartup"); + warn("found both wWinMain and WinMain; using latter"); } - return ""; + return mangle("WinMainCRTStartup"); } - - // User-defined main functions and their corresponding entry points. - static const char *Entries[][2] = { - {"main", "mainCRTStartup"}, - {"wmain", "wmainCRTStartup"}, - {"WinMain", "WinMainCRTStartup"}, - {"wWinMain", "wWinMainCRTStartup"}, - }; - for (auto E : Entries) { - StringRef Entry = Symtab->findMangle(mangle(E[0])); - if (!Entry.empty() && !isa<Undefined>(Symtab->find(Entry))) - return mangle(E[1]); + if (findUnderscoreMangle("wmain")) { + if (!findUnderscoreMangle("main")) + return mangle("wmainCRTStartup"); + warn("found both wmain and main; using latter"); } - return ""; + return mangle("mainCRTStartup"); } WindowsSubsystem LinkerDriver::inferSubsystem() { if (Config->DLL) return IMAGE_SUBSYSTEM_WINDOWS_GUI; - if (Symtab->findUnderscore("main") || Symtab->findUnderscore("wmain")) + if (Config->MinGW) return IMAGE_SUBSYSTEM_WINDOWS_CUI; - if (Symtab->findUnderscore("WinMain") || Symtab->findUnderscore("wWinMain")) + // Note that link.exe infers the subsystem from the presence of these + // functions even if /entry: or /nodefaultlib are passed which causes them + // to not be called. + bool HaveMain = findUnderscoreMangle("main"); + bool HaveWMain = findUnderscoreMangle("wmain"); + bool HaveWinMain = findUnderscoreMangle("WinMain"); + bool HaveWWinMain = findUnderscoreMangle("wWinMain"); + if (HaveMain || HaveWMain) { + if (HaveWinMain || HaveWWinMain) { + warn(std::string("found ") + (HaveMain ? "main" : "wmain") + " and " + + (HaveWinMain ? "WinMain" : "wWinMain") + + "; defaulting to /subsystem:console"); + } + return IMAGE_SUBSYSTEM_WINDOWS_CUI; + } + if (HaveWinMain || HaveWWinMain) return IMAGE_SUBSYSTEM_WINDOWS_GUI; return IMAGE_SUBSYSTEM_UNKNOWN; } @@ -500,26 +534,65 @@ static std::string createResponseFile(const opt::InputArgList &Args, return Data.str(); } -static unsigned getDefaultDebugType(const opt::InputArgList &Args) { - unsigned DebugTypes = static_cast<unsigned>(DebugType::CV); +enum class DebugKind { Unknown, None, Full, FastLink, GHash, Dwarf, Symtab }; + +static DebugKind parseDebugKind(const opt::InputArgList &Args) { + auto *A = Args.getLastArg(OPT_debug, OPT_debug_opt); + if (!A) + return DebugKind::None; + if (A->getNumValues() == 0) + return DebugKind::Full; + + DebugKind Debug = StringSwitch<DebugKind>(A->getValue()) + .CaseLower("none", DebugKind::None) + .CaseLower("full", DebugKind::Full) + .CaseLower("fastlink", DebugKind::FastLink) + // LLD extensions + .CaseLower("ghash", DebugKind::GHash) + .CaseLower("dwarf", DebugKind::Dwarf) + .CaseLower("symtab", DebugKind::Symtab) + .Default(DebugKind::Unknown); + + if (Debug == DebugKind::FastLink) { + warn("/debug:fastlink unsupported; using /debug:full"); + return DebugKind::Full; + } + if (Debug == DebugKind::Unknown) { + error("/debug: unknown option: " + Twine(A->getValue())); + return DebugKind::None; + } + return Debug; +} + +static unsigned parseDebugTypes(const opt::InputArgList &Args) { + unsigned DebugTypes = static_cast<unsigned>(DebugType::None); + + if (auto *A = Args.getLastArg(OPT_debugtype)) { + SmallVector<StringRef, 3> Types; + A->getSpelling().split(Types, ',', /*KeepEmpty=*/false); + + for (StringRef Type : Types) { + unsigned V = StringSwitch<unsigned>(Type.lower()) + .Case("cv", static_cast<unsigned>(DebugType::CV)) + .Case("pdata", static_cast<unsigned>(DebugType::PData)) + .Case("fixup", static_cast<unsigned>(DebugType::Fixup)) + .Default(0); + if (V == 0) { + warn("/debugtype: unknown option: " + Twine(A->getValue())); + continue; + } + DebugTypes |= V; + } + return DebugTypes; + } + + // Default debug types + DebugTypes = static_cast<unsigned>(DebugType::CV); if (Args.hasArg(OPT_driver)) DebugTypes |= static_cast<unsigned>(DebugType::PData); if (Args.hasArg(OPT_profile)) DebugTypes |= static_cast<unsigned>(DebugType::Fixup); - return DebugTypes; -} -static unsigned parseDebugType(StringRef Arg) { - SmallVector<StringRef, 3> Types; - Arg.split(Types, ',', /*KeepEmpty=*/false); - - unsigned DebugTypes = static_cast<unsigned>(DebugType::None); - for (StringRef Type : Types) - DebugTypes |= StringSwitch<unsigned>(Type.lower()) - .Case("cv", static_cast<unsigned>(DebugType::CV)) - .Case("pdata", static_cast<unsigned>(DebugType::PData)) - .Case("fixup", static_cast<unsigned>(DebugType::Fixup)) - .Default(0); return DebugTypes; } @@ -679,131 +752,6 @@ static void parseModuleDefs(StringRef Path) { } } -// A helper function for filterBitcodeFiles. -static bool needsRebuilding(MemoryBufferRef MB) { - // The MSVC linker doesn't support thin archives, so if it's a thin - // archive, we always need to rebuild it. - std::unique_ptr<Archive> File = - CHECK(Archive::create(MB), "Failed to read " + MB.getBufferIdentifier()); - if (File->isThin()) - return true; - - // Returns true if the archive contains at least one bitcode file. - for (MemoryBufferRef Member : getArchiveMembers(File.get())) - if (identify_magic(Member.getBuffer()) == file_magic::bitcode) - return true; - return false; -} - -// Opens a given path as an archive file and removes bitcode files -// from them if exists. This function is to appease the MSVC linker as -// their linker doesn't like archive files containing non-native -// object files. -// -// If a given archive doesn't contain bitcode files, the archive path -// is returned as-is. Otherwise, a new temporary file is created and -// its path is returned. -static Optional<std::string> -filterBitcodeFiles(StringRef Path, std::vector<std::string> &TemporaryFiles) { - std::unique_ptr<MemoryBuffer> MB = CHECK( - MemoryBuffer::getFile(Path, -1, false, true), "could not open " + Path); - MemoryBufferRef MBRef = MB->getMemBufferRef(); - file_magic Magic = identify_magic(MBRef.getBuffer()); - - if (Magic == file_magic::bitcode) - return None; - if (Magic != file_magic::archive) - return Path.str(); - if (!needsRebuilding(MBRef)) - return Path.str(); - - std::unique_ptr<Archive> File = - CHECK(Archive::create(MBRef), - MBRef.getBufferIdentifier() + ": failed to parse archive"); - - std::vector<NewArchiveMember> New; - for (MemoryBufferRef Member : getArchiveMembers(File.get())) - if (identify_magic(Member.getBuffer()) != file_magic::bitcode) - New.emplace_back(Member); - - if (New.empty()) - return None; - - log("Creating a temporary archive for " + Path + " to remove bitcode files"); - - SmallString<128> S; - if (std::error_code EC = sys::fs::createTemporaryFile( - "lld-" + sys::path::stem(Path), ".lib", S)) - fatal("cannot create a temporary file: " + EC.message()); - std::string Temp = S.str(); - TemporaryFiles.push_back(Temp); - - Error E = - llvm::writeArchive(Temp, New, /*WriteSymtab=*/true, Archive::Kind::K_GNU, - /*Deterministics=*/true, - /*Thin=*/false); - handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { - error("failed to create a new archive " + S.str() + ": " + EI.message()); - }); - return Temp; -} - -// Create response file contents and invoke the MSVC linker. -void LinkerDriver::invokeMSVC(opt::InputArgList &Args) { - std::string Rsp = "/nologo\n"; - std::vector<std::string> Temps; - - // Write out archive members that we used in symbol resolution and pass these - // to MSVC before any archives, so that MSVC uses the same objects to satisfy - // references. - for (ObjFile *Obj : ObjFile::Instances) { - if (Obj->ParentName.empty()) - continue; - SmallString<128> S; - int Fd; - if (auto EC = sys::fs::createTemporaryFile( - "lld-" + sys::path::filename(Obj->ParentName), ".obj", Fd, S)) - fatal("cannot create a temporary file: " + EC.message()); - raw_fd_ostream OS(Fd, /*shouldClose*/ true); - OS << Obj->MB.getBuffer(); - Temps.push_back(S.str()); - Rsp += quote(S) + "\n"; - } - - for (auto *Arg : Args) { - switch (Arg->getOption().getID()) { - case OPT_linkrepro: - case OPT_lldmap: - case OPT_lldmap_file: - case OPT_lldsavetemps: - case OPT_msvclto: - // LLD-specific options are stripped. - break; - case OPT_opt: - if (!StringRef(Arg->getValue()).startswith("lld")) - Rsp += toString(*Arg) + " "; - break; - case OPT_INPUT: { - if (Optional<StringRef> Path = doFindFile(Arg->getValue())) { - if (Optional<std::string> S = filterBitcodeFiles(*Path, Temps)) - Rsp += quote(*S) + "\n"; - continue; - } - Rsp += quote(Arg->getValue()) + "\n"; - break; - } - default: - Rsp += toString(*Arg) + "\n"; - } - } - - std::vector<StringRef> ObjFiles = Symtab->compileBitcodeFiles(); - runMSVCLinker(Rsp, ObjFiles); - - for (StringRef Path : Temps) - sys::fs::remove(Path); -} - void LinkerDriver::enqueueTask(std::function<void()> Task) { TaskQueue.push_back(std::move(Task)); } @@ -859,6 +807,97 @@ static void parseOrderFile(StringRef Arg) { } } +static void markAddrsig(Symbol *S) { + if (auto *D = dyn_cast_or_null<Defined>(S)) + if (Chunk *C = D->getChunk()) + C->KeepUnique = true; +} + +static void findKeepUniqueSections() { + // Exported symbols could be address-significant in other executables or DSOs, + // so we conservatively mark them as address-significant. + for (Export &R : Config->Exports) + markAddrsig(R.Sym); + + // Visit the address-significance table in each object file and mark each + // referenced symbol as address-significant. + for (ObjFile *Obj : ObjFile::Instances) { + ArrayRef<Symbol *> Syms = Obj->getSymbols(); + if (Obj->AddrsigSec) { + ArrayRef<uint8_t> Contents; + Obj->getCOFFObj()->getSectionContents(Obj->AddrsigSec, Contents); + const uint8_t *Cur = Contents.begin(); + while (Cur != Contents.end()) { + unsigned Size; + const char *Err; + uint64_t SymIndex = decodeULEB128(Cur, &Size, Contents.end(), &Err); + if (Err) + fatal(toString(Obj) + ": could not decode addrsig section: " + Err); + if (SymIndex >= Syms.size()) + fatal(toString(Obj) + ": invalid symbol index in addrsig section"); + markAddrsig(Syms[SymIndex]); + Cur += Size; + } + } else { + // If an object file does not have an address-significance table, + // conservatively mark all of its symbols as address-significant. + for (Symbol *S : Syms) + markAddrsig(S); + } + } +} + +// link.exe replaces each %foo% in AltPath with the contents of environment +// variable foo, and adds the two magic env vars _PDB (expands to the basename +// of pdb's output path) and _EXT (expands to the extension of the output +// binary). +// lld only supports %_PDB% and %_EXT% and warns on references to all other env +// vars. +static void parsePDBAltPath(StringRef AltPath) { + SmallString<128> Buf; + StringRef PDBBasename = + sys::path::filename(Config->PDBPath, sys::path::Style::windows); + StringRef BinaryExtension = + sys::path::extension(Config->OutputFile, sys::path::Style::windows); + if (!BinaryExtension.empty()) + BinaryExtension = BinaryExtension.substr(1); // %_EXT% does not include '.'. + + // Invariant: + // +--------- Cursor ('a...' might be the empty string). + // | +----- FirstMark + // | | +- SecondMark + // v v v + // a...%...%... + size_t Cursor = 0; + while (Cursor < AltPath.size()) { + size_t FirstMark, SecondMark; + if ((FirstMark = AltPath.find('%', Cursor)) == StringRef::npos || + (SecondMark = AltPath.find('%', FirstMark + 1)) == StringRef::npos) { + // Didn't find another full fragment, treat rest of string as literal. + Buf.append(AltPath.substr(Cursor)); + break; + } + + // Found a full fragment. Append text in front of first %, and interpret + // text between first and second % as variable name. + Buf.append(AltPath.substr(Cursor, FirstMark - Cursor)); + StringRef Var = AltPath.substr(FirstMark, SecondMark - FirstMark + 1); + if (Var.equals_lower("%_pdb%")) + Buf.append(PDBBasename); + else if (Var.equals_lower("%_ext%")) + Buf.append(BinaryExtension); + else { + warn("only %_PDB% and %_EXT% supported in /pdbaltpath:, keeping " + + Var + " as literal"); + Buf.append(Var); + } + + Cursor = SecondMark + 1; + } + + Config->PDBAltPath = Buf; +} + void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // If the first command line argument is "/lib", link.exe acts like lib.exe. // We call our own implementation of lib.exe that understands bitcode files. @@ -947,11 +986,17 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /ignore for (auto *Arg : Args.filtered(OPT_ignore)) { - if (StringRef(Arg->getValue()) == "4037") - Config->WarnMissingOrderSymbol = false; - else if (StringRef(Arg->getValue()) == "4217") - Config->WarnLocallyDefinedImported = false; - // Other warning numbers are ignored. + SmallVector<StringRef, 8> Vec; + StringRef(Arg->getValue()).split(Vec, ','); + for (StringRef S : Vec) { + if (S == "4037") + Config->WarnMissingOrderSymbol = false; + else if (S == "4099") + Config->WarnDebugInfoUnusable = false; + else if (S == "4217") + Config->WarnLocallyDefinedImported = false; + // Other warning numbers are ignored. + } } // Handle /out @@ -965,20 +1010,26 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Handle /force or /force:unresolved if (Args.hasArg(OPT_force, OPT_force_unresolved)) - Config->Force = true; + Config->ForceUnresolved = true; + + // Handle /force or /force:multiple + if (Args.hasArg(OPT_force, OPT_force_multiple)) + Config->ForceMultiple = true; // Handle /debug - if (Args.hasArg(OPT_debug, OPT_debug_dwarf, OPT_debug_ghash)) { + DebugKind Debug = parseDebugKind(Args); + if (Debug == DebugKind::Full || Debug == DebugKind::Dwarf || + Debug == DebugKind::GHash) { Config->Debug = true; Config->Incremental = true; - if (auto *Arg = Args.getLastArg(OPT_debugtype)) - Config->DebugTypes = parseDebugType(Arg->getValue()); - else - Config->DebugTypes = getDefaultDebugType(Args); } + // Handle /debugtype + Config->DebugTypes = parseDebugTypes(Args); + // Handle /pdb - bool ShouldCreatePDB = Args.hasArg(OPT_debug, OPT_debug_ghash); + bool ShouldCreatePDB = + (Debug == DebugKind::Full || Debug == DebugKind::GHash); if (ShouldCreatePDB) { if (auto *Arg = Args.getLastArg(OPT_pdb)) Config->PDBPath = Arg->getValue(); @@ -1099,7 +1150,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { Config->Implib = Arg->getValue(); // Handle /opt. - bool DoGC = !Args.hasArg(OPT_debug) || Args.hasArg(OPT_profile); + bool DoGC = Debug == DebugKind::None || Args.hasArg(OPT_profile); unsigned ICFLevel = Args.hasArg(OPT_profile) ? 0 : 1; // 0: off, 1: limited, 2: on unsigned TailMerge = 1; @@ -1184,6 +1235,12 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { parseMerge(".xdata=.rdata"); parseMerge(".bss=.data"); + if (Config->MinGW) { + parseMerge(".ctors=.rdata"); + parseMerge(".dtors=.rdata"); + parseMerge(".CRT=.rdata"); + } + // Handle /section for (auto *Arg : Args.filtered(OPT_section)) parseSection(Arg->getValue()); @@ -1237,9 +1294,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { Config->NxCompat = Args.hasFlag(OPT_nxcompat, OPT_nxcompat_no, true); Config->TerminalServerAware = !Config->DLL && Args.hasFlag(OPT_tsaware, OPT_tsaware_no, true); - Config->DebugDwarf = Args.hasArg(OPT_debug_dwarf); - Config->DebugGHashes = Args.hasArg(OPT_debug_ghash); - Config->DebugSymtab = Args.hasArg(OPT_debug_symtab); + Config->DebugDwarf = Debug == DebugKind::Dwarf; + Config->DebugGHashes = Debug == DebugKind::GHash; + Config->DebugSymtab = Debug == DebugKind::Symtab; Config->MapFile = getMapFile(Args); @@ -1269,10 +1326,14 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { return; std::set<sys::fs::UniqueID> WholeArchives; - for (auto *Arg : Args.filtered(OPT_wholearchive_file)) - if (Optional<StringRef> Path = doFindFile(Arg->getValue())) + AutoExporter Exporter; + for (auto *Arg : Args.filtered(OPT_wholearchive_file)) { + if (Optional<StringRef> Path = doFindFile(Arg->getValue())) { if (Optional<sys::fs::UniqueID> ID = getUniqueID(*Path)) WholeArchives.insert(*ID); + Exporter.addWholeArchive(*Path); + } + } // A predicate returning true if a given path is an argument for // /wholearchive:, or /wholearchive is enabled globally. @@ -1303,12 +1364,16 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Read all input files given via the command line. run(); + if (errorCount()) + return; + // We should have inferred a machine type by now from the input files, but if // not we assume x64. if (Config->Machine == IMAGE_FILE_MACHINE_UNKNOWN) { warn("/machine is not specified. x64 is assumed"); Config->Machine = AMD64; } + Config->Wordsize = Config->is64() ? 8 : 4; // Input files can be Windows resource files (.res files). We use // WindowsResource to convert resource files to a regular COFF file, @@ -1335,25 +1400,6 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { error("/dynamicbase:no is not compatible with " + machineToStr(Config->Machine)); - // Handle /entry and /dll - if (auto *Arg = Args.getLastArg(OPT_entry)) { - Config->Entry = addUndefined(mangle(Arg->getValue())); - } else if (!Config->Entry && !Config->NoEntry) { - if (Args.hasArg(OPT_dll)) { - StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12" - : "_DllMainCRTStartup"; - Config->Entry = addUndefined(S); - } else { - // Windows specific -- If entry point name is not given, we need to - // infer that from user-defined entry name. - StringRef S = findDefaultEntry(); - if (S.empty()) - fatal("entry point must be defined"); - Config->Entry = addUndefined(S); - log("Entry name inferred: " + S); - } - } - // Handle /export for (auto *Arg : Args.filtered(OPT_export)) { Export E = parseExport(Arg->getValue()); @@ -1379,6 +1425,34 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { return; } + // Windows specific -- if no /subsystem is given, we need to infer + // that from entry point name. Must happen before /entry handling, + // and after the early return when just writing an import library. + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { + Config->Subsystem = inferSubsystem(); + if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) + fatal("subsystem must be defined"); + } + + // Handle /entry and /dll + if (auto *Arg = Args.getLastArg(OPT_entry)) { + Config->Entry = addUndefined(mangle(Arg->getValue())); + } else if (!Config->Entry && !Config->NoEntry) { + if (Args.hasArg(OPT_dll)) { + StringRef S = (Config->Machine == I386) ? "__DllMainCRTStartup@12" + : "_DllMainCRTStartup"; + Config->Entry = addUndefined(S); + } else { + // Windows specific -- If entry point name is not given, we need to + // infer that from user-defined entry name. + StringRef S = findDefaultEntry(); + if (S.empty()) + fatal("entry point must be defined"); + Config->Entry = addUndefined(S); + log("Entry name inferred: " + S); + } + } + // Handle /delayload for (auto *Arg : Args.filtered(OPT_delayload)) { Config->DelayLoads.insert(StringRef(Arg->getValue()).lower()); @@ -1412,6 +1486,9 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // tools won't work correctly if these assumptions are not held. sys::fs::make_absolute(Config->PDBAltPath); sys::path::remove_dots(Config->PDBAltPath); + } else { + // Don't do this earlier, so that Config->OutputFile is ready. + parsePDBAltPath(Config->PDBAltPath); } } @@ -1435,6 +1512,13 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // Needed for MSVC 2017 15.5 CRT. Symtab->addAbsolute(mangle("__enclave_config"), 0); + if (Config->MinGW) { + Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST__"), 0); + Symtab->addAbsolute(mangle("__RUNTIME_PSEUDO_RELOC_LIST_END__"), 0); + Symtab->addAbsolute(mangle("__CTOR_LIST__"), 0); + Symtab->addAbsolute(mangle("__DTOR_LIST__"), 0); + } + // This code may add new undefined symbols to the link, which may enqueue more // symbol resolution tasks, so we need to continue executing tasks until we // converge. @@ -1474,31 +1558,34 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { if (errorCount()) return; - // If /msvclto is given, we use the MSVC linker to link LTO output files. - // This is useful because MSVC link.exe can generate complete PDBs. - if (Args.hasArg(OPT_msvclto)) { - invokeMSVC(Args); - return; - } - // Do LTO by compiling bitcode input files to a set of native COFF files then // link those files. Symtab->addCombinedLTOObjects(); run(); + if (Config->MinGW) { + // Load any further object files that might be needed for doing automatic + // imports. + // + // For cases with no automatically imported symbols, this iterates once + // over the symbol table and doesn't do anything. + // + // For the normal case with a few automatically imported symbols, this + // should only need to be run once, since each new object file imported + // is an import library and wouldn't add any new undefined references, + // but there's nothing stopping the __imp_ symbols from coming from a + // normal object file as well (although that won't be used for the + // actual autoimport later on). If this pass adds new undefined references, + // we won't iterate further to resolve them. + Symtab->loadMinGWAutomaticImports(); + run(); + } + // Make sure we have resolved all symbols. Symtab->reportRemainingUndefines(); if (errorCount()) return; - // Windows specific -- if no /subsystem is given, we need to infer - // that from entry point name. - if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { - Config->Subsystem = inferSubsystem(); - if (Config->Subsystem == IMAGE_SUBSYSTEM_UNKNOWN) - fatal("subsystem must be defined"); - } - // Handle /safeseh. if (Args.hasFlag(OPT_safeseh, OPT_safeseh_no, false)) { for (ObjFile *File : ObjFile::Instances) @@ -1512,7 +1599,7 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { // are chosen to be exported. if (Config->DLL && ((Config->MinGW && Config->Exports.empty()) || Args.hasArg(OPT_export_all_symbols))) { - AutoExporter Exporter; + Exporter.initSymbolExcludes(); Symtab->forEachSymbol([=](Symbol *S) { auto *Def = dyn_cast<Defined>(S); @@ -1551,11 +1638,11 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { continue; } + // If the symbol isn't common, it must have been replaced with a regular + // symbol, which will carry its own alignment. auto *DC = dyn_cast<DefinedCommon>(Sym); - if (!DC) { - warn("/aligncomm symbol " + Name + " of wrong kind"); + if (!DC) continue; - } CommonChunk *C = DC->getChunk(); C->Alignment = std::max(C->Alignment, Alignment); @@ -1576,8 +1663,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) { markLive(Symtab->getChunks()); // Identify identical COMDAT sections to merge them. - if (Config->DoICF) + if (Config->DoICF) { + findKeepUniqueSections(); doICF(Symtab->getChunks()); + } // Write the result. writeResult(); diff --git a/COFF/Driver.h b/COFF/Driver.h index 627e991a9028..e779721ab75d 100644 --- a/COFF/Driver.h +++ b/COFF/Driver.h @@ -89,6 +89,7 @@ private: Optional<StringRef> findLib(StringRef Filename); StringRef doFindFile(StringRef Filename); StringRef doFindLib(StringRef Filename); + StringRef doFindLibMinGW(StringRef Filename); // Parses LIB environment which contains a list of search paths. void addLibSearchPaths(); @@ -103,7 +104,6 @@ private: std::set<std::string> VisitedLibs; Symbol *addUndefined(StringRef Sym); - StringRef mangle(StringRef Sym); // Windows specific -- "main" is not the only main function in Windows. // You can choose one from these four -- {w,}{WinMain,main}. @@ -115,8 +115,6 @@ private: StringRef findDefaultEntry(); WindowsSubsystem inferSubsystem(); - void invokeMSVC(llvm::opt::InputArgList &Args); - void addBuffer(std::unique_ptr<MemoryBuffer> MB, bool WholeArchive); void addArchiveBuffer(MemoryBufferRef MBRef, StringRef SymName, StringRef ParentName); diff --git a/COFF/DriverUtils.cpp b/COFF/DriverUtils.cpp index c12e791f9507..3a11895497a4 100644 --- a/COFF/DriverUtils.cpp +++ b/COFF/DriverUtils.cpp @@ -713,26 +713,6 @@ MemoryBufferRef convertResToCOFF(ArrayRef<MemoryBufferRef> MBs) { return MBRef; } -// Run MSVC link.exe for given in-memory object files. -// Command line options are copied from those given to LLD. -// This is for the /msvclto option. -void runMSVCLinker(std::string Rsp, ArrayRef<StringRef> Objects) { - // Write the in-memory object files to disk. - std::vector<TemporaryFile> Temps; - for (StringRef S : Objects) { - Temps.emplace_back("lto", "obj", S); - Rsp += quote(Temps.back().Path) + "\n"; - } - - log("link.exe " + Rsp); - - // Run MSVC link.exe. - Temps.emplace_back("lto", "rsp", Rsp); - Executor E("link.exe"); - E.add(Twine("@" + Temps.back().Path)); - E.run(); -} - // Create OptTable // Create prefix string literals used in Options.td @@ -883,7 +863,9 @@ std::vector<const char *> ArgParser::tokenize(StringRef S) { } void printHelp(const char *Argv0) { - COFFOptTable().PrintHelp(outs(), Argv0, "LLVM Linker", false); + COFFOptTable().PrintHelp(outs(), + (std::string(Argv0) + " [options] file...").c_str(), + "LLVM Linker", false); } } // namespace coff diff --git a/COFF/ICF.cpp b/COFF/ICF.cpp index 7feb3c4e0b0c..34ea360fa925 100644 --- a/COFF/ICF.cpp +++ b/COFF/ICF.cpp @@ -22,6 +22,7 @@ #include "Chunks.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" +#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" @@ -80,7 +81,7 @@ private: bool ICF::isEligible(SectionChunk *C) { // Non-comdat chunks, dead chunks, and writable chunks are not elegible. bool Writable = C->getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_WRITE; - if (!C->isCOMDAT() || !C->isLive() || Writable) + if (!C->isCOMDAT() || !C->Live || Writable) return false; // Code sections are eligible. @@ -93,7 +94,11 @@ bool ICF::isEligible(SectionChunk *C) { return true; // So are vtables. - return C->Sym && C->Sym->getName().startswith("??_7"); + if (C->Sym && C->Sym->getName().startswith("??_7")) + return true; + + // Anything else not in an address-significance table is eligible. + return !C->KeepUnique; } // Split an equivalence class into smaller classes. @@ -222,10 +227,10 @@ void ICF::forEachClass(std::function<void(size_t, size_t)> Fn) { size_t Boundaries[NumShards + 1]; Boundaries[0] = 0; Boundaries[NumShards] = Chunks.size(); - for_each_n(parallel::par, size_t(1), NumShards, [&](size_t I) { + parallelForEachN(1, NumShards, [&](size_t I) { Boundaries[I] = findBoundary((I - 1) * Step, Chunks.size()); }); - for_each_n(parallel::par, size_t(1), NumShards + 1, [&](size_t I) { + parallelForEachN(1, NumShards + 1, [&](size_t I) { if (Boundaries[I - 1] < Boundaries[I]) { forEachClassRange(Boundaries[I - 1], Boundaries[I], Fn); } @@ -257,9 +262,19 @@ void ICF::run(ArrayRef<Chunk *> Vec) { SC->Class[0] = NextId++; // Initially, we use hash values to partition sections. - for_each(parallel::par, Chunks.begin(), Chunks.end(), [&](SectionChunk *SC) { + parallelForEach(Chunks, [&](SectionChunk *SC) { + SC->Class[1] = xxHash64(SC->getContents()); + }); + + // Combine the hashes of the sections referenced by each section into its + // hash. + parallelForEach(Chunks, [&](SectionChunk *SC) { + uint32_t Hash = SC->Class[1]; + for (Symbol *B : SC->symbols()) + if (auto *Sym = dyn_cast_or_null<DefinedRegular>(B)) + Hash ^= Sym->getChunk()->Class[1]; // Set MSB to 1 to avoid collisions with non-hash classs. - SC->Class[0] = xxHash64(SC->getContents()) | (1 << 31); + SC->Class[0] = Hash | (1U << 31); }); // From now on, sections in Chunks are ordered so that sections in diff --git a/COFF/InputFiles.cpp b/COFF/InputFiles.cpp index 2b3e65fae04b..236c90ef0388 100644 --- a/COFF/InputFiles.cpp +++ b/COFF/InputFiles.cpp @@ -54,8 +54,16 @@ std::vector<BitcodeFile *> BitcodeFile::Instances; static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, Symbol *Source, Symbol *Target) { if (auto *U = dyn_cast<Undefined>(Source)) { - if (U->WeakAlias && U->WeakAlias != Target) + if (U->WeakAlias && U->WeakAlias != Target) { + // Weak aliases as produced by GCC are named in the form + // .weak.<weaksymbol>.<othersymbol>, where <othersymbol> is the name + // of another symbol emitted near the weak symbol. + // Just use the definition from the first object file that defined + // this weak symbol. + if (Config->MinGW) + return; Symtab->reportDuplicate(Source, F); + } U->WeakAlias = Target; } } @@ -147,9 +155,10 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, const coff_aux_section_definition *Def, StringRef LeaderName) { const coff_section *Sec; - StringRef Name; if (auto EC = COFFObj->getSection(SectionNumber, Sec)) fatal("getSection failed: #" + Twine(SectionNumber) + ": " + EC.message()); + + StringRef Name; if (auto EC = COFFObj->getSectionName(Sec, Name)) fatal("getSectionName failed: #" + Twine(SectionNumber) + ": " + EC.message()); @@ -161,6 +170,11 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, return nullptr; } + if (Name == ".llvm_addrsig") { + AddrsigSec = Sec; + return nullptr; + } + // Object files may have DWARF debug info or MS CodeView debug info // (or both). // @@ -168,8 +182,8 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, // of the linker; they are just a data section containing relocations. // We can just link them to complete debug info. // - // CodeView needs a linker support. We need to interpret and debug - // info, and then write it to a separate .pdb file. + // CodeView needs linker support. We need to interpret debug info, + // and then write it to a separate .pdb file. // Ignore DWARF debug info unless /debug is given. if (!Config->Debug && Name.startswith(".debug_")) @@ -205,7 +219,13 @@ SectionChunk *ObjFile::readSection(uint32_t SectionNumber, void ObjFile::readAssociativeDefinition( COFFSymbolRef Sym, const coff_aux_section_definition *Def) { - SectionChunk *Parent = SparseChunks[Def->getNumber(Sym.isBigObj())]; + readAssociativeDefinition(Sym, Def, Def->getNumber(Sym.isBigObj())); +} + +void ObjFile::readAssociativeDefinition(COFFSymbolRef Sym, + const coff_aux_section_definition *Def, + uint32_t ParentSection) { + SectionChunk *Parent = SparseChunks[ParentSection]; // If the parent is pending, it probably means that its section definition // appears after us in the symbol table. Leave the associated section as @@ -225,6 +245,35 @@ void ObjFile::readAssociativeDefinition( } } +void ObjFile::recordPrevailingSymbolForMingw( + COFFSymbolRef Sym, DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { + // For comdat symbols in executable sections, where this is the copy + // of the section chunk we actually include instead of discarding it, + // add the symbol to a map to allow using it for implicitly + // associating .[px]data$<func> sections to it. + int32_t SectionNumber = Sym.getSectionNumber(); + SectionChunk *SC = SparseChunks[SectionNumber]; + if (SC && SC->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + PrevailingSectionMap[Name] = SectionNumber; + } +} + +void ObjFile::maybeAssociateSEHForMingw( + COFFSymbolRef Sym, const coff_aux_section_definition *Def, + const DenseMap<StringRef, uint32_t> &PrevailingSectionMap) { + StringRef Name; + COFFObj->getSymbolName(Sym, Name); + if (Name.consume_front(".pdata$") || Name.consume_front(".xdata$")) { + // For MinGW, treat .[px]data$<func> as implicitly associative to + // the symbol <func>. + auto ParentSym = PrevailingSectionMap.find(Name); + if (ParentSym != PrevailingSectionMap.end()) + readAssociativeDefinition(Sym, Def, ParentSym->second); + } +} + Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { SectionChunk *SC = SparseChunks[Sym.getSectionNumber()]; if (Sym.isExternal()) { @@ -232,10 +281,17 @@ Symbol *ObjFile::createRegular(COFFSymbolRef Sym) { COFFObj->getSymbolName(Sym, Name); if (SC) return Symtab->addRegular(this, Name, Sym.getGeneric(), SC); + // For MinGW symbols named .weak.* that point to a discarded section, + // don't create an Undefined symbol. If nothing ever refers to the symbol, + // everything should be fine. If something actually refers to the symbol + // (e.g. the undefined weak alias), linking will fail due to undefined + // references at the end. + if (Config->MinGW && Name.startswith(".weak.")) + return nullptr; return Symtab->addUndefined(Name, this, false); } if (SC) - return make<DefinedRegular>(this, /*Name*/ "", false, + return make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, Sym.getGeneric(), SC); return nullptr; } @@ -248,19 +304,24 @@ void ObjFile::initializeSymbols() { std::vector<uint32_t> PendingIndexes; PendingIndexes.reserve(NumSymbols); + DenseMap<StringRef, uint32_t> PrevailingSectionMap; std::vector<const coff_aux_section_definition *> ComdatDefs( COFFObj->getNumberOfSections() + 1); for (uint32_t I = 0; I < NumSymbols; ++I) { COFFSymbolRef COFFSym = check(COFFObj->getSymbol(I)); + bool PrevailingComdat; if (COFFSym.isUndefined()) { Symbols[I] = createUndefined(COFFSym); } else if (COFFSym.isWeakExternal()) { Symbols[I] = createUndefined(COFFSym); uint32_t TagIndex = COFFSym.getAux<coff_aux_weak_external>()->TagIndex; WeakAliases.emplace_back(Symbols[I], TagIndex); - } else if (Optional<Symbol *> OptSym = createDefined(COFFSym, ComdatDefs)) { + } else if (Optional<Symbol *> OptSym = + createDefined(COFFSym, ComdatDefs, PrevailingComdat)) { Symbols[I] = *OptSym; + if (Config->MinGW && PrevailingComdat) + recordPrevailingSymbolForMingw(COFFSym, PrevailingSectionMap); } else { // createDefined() returns None if a symbol belongs to a section that // was pending at the point when the symbol was read. This can happen in @@ -278,9 +339,12 @@ void ObjFile::initializeSymbols() { for (uint32_t I : PendingIndexes) { COFFSymbolRef Sym = check(COFFObj->getSymbol(I)); - if (auto *Def = Sym.getSectionDefinition()) + if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) { if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(Sym, Def); + else if (Config->MinGW) + maybeAssociateSEHForMingw(Sym, Def, PrevailingSectionMap); + } if (SparseChunks[Sym.getSectionNumber()] == PendingComdat) { StringRef Name; COFFObj->getSymbolName(Sym, Name); @@ -306,7 +370,9 @@ Symbol *ObjFile::createUndefined(COFFSymbolRef Sym) { Optional<Symbol *> ObjFile::createDefined( COFFSymbolRef Sym, - std::vector<const coff_aux_section_definition *> &ComdatDefs) { + std::vector<const coff_aux_section_definition *> &ComdatDefs, + bool &Prevailing) { + Prevailing = false; auto GetName = [&]() { StringRef S; COFFObj->getSymbolName(Sym, S); @@ -352,12 +418,11 @@ Optional<Symbol *> ObjFile::createDefined( if (const coff_aux_section_definition *Def = ComdatDefs[SectionNumber]) { ComdatDefs[SectionNumber] = nullptr; Symbol *Leader; - bool Prevailing; if (Sym.isExternal()) { std::tie(Leader, Prevailing) = Symtab->addComdat(this, GetName(), Sym.getGeneric()); } else { - Leader = make<DefinedRegular>(this, /*Name*/ "", false, + Leader = make<DefinedRegular>(this, /*Name*/ "", /*IsCOMDAT*/ false, /*IsExternal*/ false, Sym.getGeneric()); Prevailing = true; } @@ -377,7 +442,7 @@ Optional<Symbol *> ObjFile::createDefined( // leader symbol by setting the section's ComdatDefs pointer if we encounter a // non-associative comdat. if (SparseChunks[SectionNumber] == PendingComdat) { - if (auto *Def = Sym.getSectionDefinition()) { + if (const coff_aux_section_definition *Def = Sym.getSectionDefinition()) { if (Def->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) readAssociativeDefinition(Sym, Def); else @@ -385,8 +450,10 @@ Optional<Symbol *> ObjFile::createDefined( } } + // readAssociativeDefinition() writes to SparseChunks, so need to check again. if (SparseChunks[SectionNumber] == PendingComdat) return None; + return createRegular(Sym); } @@ -437,6 +504,10 @@ void ImportFile::parse() { ExternalName = ExtName; ImpSym = Symtab->addImportData(ImpName, this); + // If this was a duplicate, we logged an error but may continue; + // in this case, ImpSym is nullptr. + if (!ImpSym) + return; if (Hdr->getType() == llvm::COFF::IMPORT_CONST) static_cast<void>(Symtab->addImportData(Name, this)); diff --git a/COFF/InputFiles.h b/COFF/InputFiles.h index 4ee4b363886f..ec802f2d0300 100644 --- a/COFF/InputFiles.h +++ b/COFF/InputFiles.h @@ -13,7 +13,9 @@ #include "Config.h" #include "lld/Common/LLVM.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" @@ -121,9 +123,12 @@ public: return Symbols[SymbolIndex]; } - // Returns the underying COFF file. + // Returns the underlying COFF file. COFFObjectFile *getCOFFObj() { return COFFObj.get(); } + // Whether the object was already merged into the final PDB or not + bool wasProcessedForPDB() const { return !!ModuleDBI; } + static std::vector<ObjFile *> Instances; // Flags in the absolute @feat.00 symbol if it is present. These usually @@ -144,6 +149,13 @@ public: // if we are not producing a PDB. llvm::pdb::DbiModuleDescriptorBuilder *ModuleDBI = nullptr; + const coff_section *AddrsigSec = nullptr; + + // When using Microsoft precompiled headers, this is the PCH's key. + // The same key is used by both the precompiled object, and objects using the + // precompiled object. Any difference indicates out-of-date objects. + llvm::Optional<uint32_t> PCHSignature; + private: void initializeChunks(); void initializeSymbols(); @@ -157,10 +169,24 @@ private: COFFSymbolRef COFFSym, const llvm::object::coff_aux_section_definition *Def); + void readAssociativeDefinition( + COFFSymbolRef COFFSym, + const llvm::object::coff_aux_section_definition *Def, + uint32_t ParentSection); + + void recordPrevailingSymbolForMingw( + COFFSymbolRef COFFSym, + llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap); + + void maybeAssociateSEHForMingw( + COFFSymbolRef Sym, const llvm::object::coff_aux_section_definition *Def, + const llvm::DenseMap<StringRef, uint32_t> &PrevailingSectionMap); + llvm::Optional<Symbol *> createDefined(COFFSymbolRef Sym, std::vector<const llvm::object::coff_aux_section_definition *> - &ComdatDefs); + &ComdatDefs, + bool &PrevailingComdat); Symbol *createRegular(COFFSymbolRef Sym); Symbol *createUndefined(COFFSymbolRef Sym); diff --git a/COFF/LTO.cpp b/COFF/LTO.cpp index 93f7ba3f9e4c..92d9ff0937c0 100644 --- a/COFF/LTO.cpp +++ b/COFF/LTO.cpp @@ -60,6 +60,9 @@ static std::unique_ptr<lto::LTO> createLTO() { C.DisableVerify = true; C.DiagHandler = diagnosticHandler; C.OptLevel = Config->LTOO; + C.CPU = GetCPUStr(); + C.MAttrs = GetMAttrs(); + if (Config->SaveTemps) checkError(C.addSaveTemps(std::string(Config->OutputFile) + ".", /*UseInputModulePath*/ true)); diff --git a/COFF/MapFile.cpp b/COFF/MapFile.cpp index 6ca1b6647bd7..fd4894250223 100644 --- a/COFF/MapFile.cpp +++ b/COFF/MapFile.cpp @@ -110,7 +110,7 @@ void coff::writeMapFile(ArrayRef<OutputSection *> OutputSections) { writeHeader(OS, Sec->getRVA(), Sec->getVirtualSize(), /*Align=*/PageSize); OS << Sec->Name << '\n'; - for (Chunk *C : Sec->getChunks()) { + for (Chunk *C : Sec->Chunks) { auto *SC = dyn_cast<SectionChunk>(C); if (!SC) continue; diff --git a/COFF/MarkLive.cpp b/COFF/MarkLive.cpp index 57ae450a9138..18b1c9c2529f 100644 --- a/COFF/MarkLive.cpp +++ b/COFF/MarkLive.cpp @@ -32,13 +32,13 @@ void markLive(ArrayRef<Chunk *> Chunks) { // COMDAT section chunks are dead by default. Add non-COMDAT chunks. for (Chunk *C : Chunks) if (auto *SC = dyn_cast<SectionChunk>(C)) - if (SC->isLive()) + if (SC->Live) Worklist.push_back(SC); auto Enqueue = [&](SectionChunk *C) { - if (C->isLive()) + if (C->Live) return; - C->markLive(); + C->Live = true; Worklist.push_back(C); }; @@ -57,7 +57,7 @@ void markLive(ArrayRef<Chunk *> Chunks) { while (!Worklist.empty()) { SectionChunk *SC = Worklist.pop_back_val(); - assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); + assert(SC->Live && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. for (Symbol *B : SC->symbols()) diff --git a/COFF/MinGW.cpp b/COFF/MinGW.cpp index 2ca00587331f..b2c8c4eadca4 100644 --- a/COFF/MinGW.cpp +++ b/COFF/MinGW.cpp @@ -19,7 +19,23 @@ using namespace lld::coff; using namespace llvm; using namespace llvm::COFF; -AutoExporter::AutoExporter() { +void AutoExporter::initSymbolExcludes() { + ExcludeSymbolPrefixes = { + // Import symbols + "__imp_", + "__IMPORT_DESCRIPTOR_", + // Extra import symbols from GNU import libraries + "__nm_", + // C++ symbols + "__rtti_", + "__builtin_", + // Artifical symbols such as .refptr + ".", + }; + ExcludeSymbolSuffixes = { + "_iname", + "_NULL_THUNK_DATA", + }; if (Config->Machine == I386) { ExcludeSymbols = { "__NULL_IMPORT_DESCRIPTOR", @@ -36,9 +52,10 @@ AutoExporter::AutoExporter() { "_DllEntryPoint@12", "_DllMainCRTStartup@12", }; + ExcludeSymbolPrefixes.insert("__head_"); } else { ExcludeSymbols = { - "_NULL_IMPORT_DESCRIPTOR", + "__NULL_IMPORT_DESCRIPTOR", "_pei386_runtime_relocator", "do_pseudo_reloc", "impure_ptr", @@ -52,8 +69,11 @@ AutoExporter::AutoExporter() { "DllEntryPoint", "DllMainCRTStartup", }; + ExcludeSymbolPrefixes.insert("_head_"); } +} +AutoExporter::AutoExporter() { ExcludeLibs = { "libgcc", "libgcc_s", @@ -64,6 +84,7 @@ AutoExporter::AutoExporter() { "libsupc++", "libobjc", "libgcj", + "libclang_rt.builtins", "libclang_rt.builtins-aarch64", "libclang_rt.builtins-arm", "libclang_rt.builtins-i386", @@ -90,6 +111,13 @@ AutoExporter::AutoExporter() { }; } +void AutoExporter::addWholeArchive(StringRef Path) { + StringRef LibName = sys::path::filename(Path); + // Drop the file extension, to match the processing below. + LibName = LibName.substr(0, LibName.rfind('.')); + ExcludeLibs.erase(LibName); +} + bool AutoExporter::shouldExport(Defined *Sym) const { if (!Sym || !Sym->isLive() || !Sym->getChunk()) return false; @@ -101,10 +129,12 @@ bool AutoExporter::shouldExport(Defined *Sym) const { if (ExcludeSymbols.count(Sym->getName())) return false; - // Don't export anything that looks like an import symbol (which also can be - // a manually defined data symbol with such a name). - if (Sym->getName().startswith("__imp_")) - return false; + for (StringRef Prefix : ExcludeSymbolPrefixes.keys()) + if (Sym->getName().startswith(Prefix)) + return false; + for (StringRef Suffix : ExcludeSymbolSuffixes.keys()) + if (Sym->getName().endswith(Suffix)) + return false; // If a corresponding __imp_ symbol exists and is defined, don't export it. if (Symtab->find(("__imp_" + Sym->getName()).str())) diff --git a/COFF/MinGW.h b/COFF/MinGW.h index fe6cc5588ebc..f9c5e3e5c2cc 100644 --- a/COFF/MinGW.h +++ b/COFF/MinGW.h @@ -23,7 +23,13 @@ class AutoExporter { public: AutoExporter(); + void initSymbolExcludes(); + + void addWholeArchive(StringRef Path); + llvm::StringSet<> ExcludeSymbols; + llvm::StringSet<> ExcludeSymbolPrefixes; + llvm::StringSet<> ExcludeSymbolSuffixes; llvm::StringSet<> ExcludeLibs; llvm::StringSet<> ExcludeObjects; diff --git a/COFF/Options.td b/COFF/Options.td index 871bad8bd655..acf1bc5c8b1d 100644 --- a/COFF/Options.td +++ b/COFF/Options.td @@ -66,13 +66,18 @@ def wholearchive_file : P<"wholearchive", "Include all object files from this ar def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>; -def manifest : F<"manifest">; -def manifest_colon : P<"manifest", "Create manifest file">; +def manifest : F<"manifest">, HelpText<"Create .manifest file">; +def manifest_colon : P< + "manifest", + "NO disables manifest output; EMBED[,ID=#] embeds manifest as resource in the image">; def manifestuac : P<"manifestuac", "User access control">; -def manifestfile : P<"manifestfile", "Manifest file path">; -def manifestdependency : P<"manifestdependency", - "Attributes for <dependency> in manifest file">; -def manifestinput : P<"manifestinput", "Specify manifest file">; +def manifestfile : P<"manifestfile", "Manifest output path, with /manifest">; +def manifestdependency : P< + "manifestdependency", + "Attributes for <dependency> element in manifest file; implies /manifest">; +def manifestinput : P< + "manifestinput", + "Additional manifest inputs; only valid with /manifest:embed">; // We cannot use multiclass P because class name "incl" is different // from its command line option name. We do this because "include" is @@ -85,22 +90,28 @@ def deffile : Joined<["/", "-"], "def:">, HelpText<"Use module-definition file">; def debug : F<"debug">, HelpText<"Embed a symbol table in the image">; -def debug_full : F<"debug:full">, Alias<debug>; +def debug_opt : P<"debug", "Embed a symbol table in the image with option">; def debugtype : P<"debugtype", "Debug Info Options">; def dll : F<"dll">, HelpText<"Create a DLL">; def driver : P<"driver", "Generate a Windows NT Kernel Mode Driver">; -def nodefaultlib_all : F<"nodefaultlib">; -def noentry : F<"noentry">; +def nodefaultlib_all : F<"nodefaultlib">, + HelpText<"Remove all default libraries">; +def noentry : F<"noentry">, + HelpText<"Don't add reference to DllMainCRTStartup; only valid with /dll">; def profile : F<"profile">; -def repro : F<"Brepro">, HelpText<"Use a hash of the executable as the PE header timestamp">; +def repro : F<"Brepro">, + HelpText<"Use a hash of the executable as the PE header timestamp">; def swaprun_cd : F<"swaprun:cd">; def swaprun_net : F<"swaprun:net">; def verbose : F<"verbose">; def wholearchive_flag : F<"wholearchive">; def force : F<"force">, + HelpText<"Allow undefined and multiply defined symbols when creating executables">; +def force_unresolved : F<"force:unresolved">, HelpText<"Allow undefined symbols when creating executables">; -def force_unresolved : F<"force:unresolved">; +def force_multiple : F<"force:multiple">, + HelpText<"Allow multiply defined symbols when creating executables">; defm WX : B<"WX", "Treat warnings as errors", "Don't treat warnings as errors">; defm allowbind : B<"allowbind", "Enable DLL binding (default)", @@ -139,13 +150,9 @@ def help : F<"help">; def help_q : Flag<["/?", "-?"], "">, Alias<help>; // LLD extensions -def debug_ghash : F<"debug:ghash">; -def debug_dwarf : F<"debug:dwarf">; -def debug_symtab : F<"debug:symtab">; def export_all_symbols : F<"export-all-symbols">; def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; -def msvclto : F<"msvclto">; def output_def : Joined<["/", "-"], "output-def:">; def pdb_source_path : P<"pdbsourcepath", "Base path used to make relative source file path absolute in PDB">; diff --git a/COFF/PDB.cpp b/COFF/PDB.cpp index 766bf3f6b456..7862b6ce4cc5 100644 --- a/COFF/PDB.cpp +++ b/COFF/PDB.cpp @@ -16,12 +16,14 @@ #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Timer.h" +#include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" #include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/RecordName.h" #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h" +#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h" #include "llvm/DebugInfo/CodeView/SymbolSerializer.h" #include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeDumpVisitor.h" @@ -48,8 +50,10 @@ #include "llvm/Object/CVDebugRecord.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JamCRC.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include <memory> @@ -79,9 +83,14 @@ struct CVIndexMap { SmallVector<TypeIndex, 0> TPIMap; SmallVector<TypeIndex, 0> IPIMap; bool IsTypeServerMap = false; + bool IsPrecompiledTypeMap = false; }; +class DebugSHandler; + class PDBLinker { + friend DebugSHandler; + public: PDBLinker(SymbolTable *Symtab) : Alloc(), Symtab(Symtab), Builder(Alloc), TypeTable(Alloc), @@ -93,7 +102,7 @@ public: } /// Emit the basic PDB structure: initial streams, headers, etc. - void initialize(const llvm::codeview::DebugInfo &BuildId); + void initialize(llvm::codeview::DebugInfo *BuildId); /// Add natvis files specified on the command line. void addNatvisFiles(); @@ -101,8 +110,10 @@ public: /// Link CodeView from each object file in the symbol table into the PDB. void addObjectsToPDB(); - /// Link CodeView from a single object file into the PDB. - void addObjFile(ObjFile *File); + /// Link CodeView from a single object file into the target (output) PDB. + /// When a precompiled headers object is linked, its TPI map might be provided + /// externally. + void addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap = nullptr); /// Produce a mapping from the type and item indices used in the object /// file to those in the destination PDB. @@ -115,18 +126,60 @@ public: /// If the object does not use a type server PDB (compiled with /Z7), we merge /// all the type and item records from the .debug$S stream and fill in the /// caller-provided ObjectIndexMap. - Expected<const CVIndexMap&> mergeDebugT(ObjFile *File, - CVIndexMap &ObjectIndexMap); + Expected<const CVIndexMap &> mergeDebugT(ObjFile *File, + CVIndexMap *ObjectIndexMap); + + /// Reads and makes available a PDB. + Expected<const CVIndexMap &> maybeMergeTypeServerPDB(ObjFile *File, + const CVType &FirstType); + + /// Merges a precompiled headers TPI map into the current TPI map. The + /// precompiled headers object will also be loaded and remapped in the + /// process. + Expected<const CVIndexMap &> + mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType, + CVIndexMap *ObjectIndexMap); + + /// Reads and makes available a precompiled headers object. + /// + /// This is a requirement for objects compiled with cl.exe /Yu. In that + /// case, the referenced object (which was compiled with /Yc) has to be loaded + /// first. This is mainly because the current object's TPI stream has external + /// references to the precompiled headers object. + /// + /// If the precompiled headers object was already loaded, this function will + /// simply return its (remapped) TPI map. + Expected<const CVIndexMap &> aquirePrecompObj(ObjFile *File, + PrecompRecord Precomp); + + /// Adds a precompiled headers object signature -> TPI mapping. + std::pair<CVIndexMap &, bool /*already there*/> + registerPrecompiledHeaders(uint32_t Signature); - Expected<const CVIndexMap&> maybeMergeTypeServerPDB(ObjFile *File, - TypeServer2Record &TS); + void mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap, + std::vector<ulittle32_t *> &StringTableRefs, + BinaryStreamRef SymData); /// Add the section map and section contributions to the PDB. void addSections(ArrayRef<OutputSection *> OutputSections, ArrayRef<uint8_t> SectionTable); - /// Write the PDB to disk. - void commit(); + /// Get the type table or the global type table if /DEBUG:GHASH is enabled. + TypeCollection &getTypeTable() { + if (Config->DebugGHashes) + return GlobalTypeTable; + return TypeTable; + } + + /// Get the ID table or the global ID table if /DEBUG:GHASH is enabled. + TypeCollection &getIDTable() { + if (Config->DebugGHashes) + return GlobalIDTable; + return IDTable; + } + + /// Write the PDB to disk and store the Guid generated for it in *Guid. + void commit(codeview::GUID *Guid); private: BumpPtrAllocator Alloc; @@ -161,14 +214,95 @@ private: std::vector<pdb::SecMapEntry> SectionMap; /// Type index mappings of type server PDBs that we've loaded so far. - std::map<GUID, CVIndexMap> TypeServerIndexMappings; + std::map<codeview::GUID, CVIndexMap> TypeServerIndexMappings; + + /// Type index mappings of precompiled objects type map that we've loaded so + /// far. + std::map<uint32_t, CVIndexMap> PrecompTypeIndexMappings; /// List of TypeServer PDBs which cannot be loaded. /// Cached to prevent repeated load attempts. - std::set<GUID> MissingTypeServerPDBs; + std::map<codeview::GUID, std::string> MissingTypeServerPDBs; +}; + +class DebugSHandler { + PDBLinker &Linker; + + /// The object file whose .debug$S sections we're processing. + ObjFile &File; + + /// The result of merging type indices. + const CVIndexMap &IndexMap; + + /// The DEBUG_S_STRINGTABLE subsection. These strings are referred to by + /// index from other records in the .debug$S section. All of these strings + /// need to be added to the global PDB string table, and all references to + /// these strings need to have their indices re-written to refer to the + /// global PDB string table. + DebugStringTableSubsectionRef CVStrTab; + + /// The DEBUG_S_FILECHKSMS subsection. As above, these are referred to + /// by other records in the .debug$S section and need to be merged into the + /// PDB. + DebugChecksumsSubsectionRef Checksums; + + /// The DEBUG_S_FRAMEDATA subsection(s). There can be more than one of + /// these and they need not appear in any specific order. However, they + /// contain string table references which need to be re-written, so we + /// collect them all here and re-write them after all subsections have been + /// discovered and processed. + std::vector<DebugFrameDataSubsectionRef> NewFpoFrames; + + /// Pointers to raw memory that we determine have string table references + /// that need to be re-written. We first process all .debug$S subsections + /// to ensure that we can handle subsections written in any order, building + /// up this list as we go. At the end, we use the string table (which must + /// have been discovered by now else it is an error) to re-write these + /// references. + std::vector<ulittle32_t *> StringTableReferences; + +public: + DebugSHandler(PDBLinker &Linker, ObjFile &File, const CVIndexMap &IndexMap) + : Linker(Linker), File(File), IndexMap(IndexMap) {} + + void handleDebugS(lld::coff::SectionChunk &DebugS); + void finish(); }; } +// Visual Studio's debugger requires absolute paths in various places in the +// PDB to work without additional configuration: +// https://docs.microsoft.com/en-us/visualstudio/debugger/debug-source-files-common-properties-solution-property-pages-dialog-box +static void pdbMakeAbsolute(SmallVectorImpl<char> &FileName) { + // The default behavior is to produce paths that are valid within the context + // of the machine that you perform the link on. If the linker is running on + // a POSIX system, we will output absolute POSIX paths. If the linker is + // running on a Windows system, we will output absolute Windows paths. If the + // user desires any other kind of behavior, they should explicitly pass + // /pdbsourcepath, in which case we will treat the exact string the user + // passed in as the gospel and not normalize, canonicalize it. + if (sys::path::is_absolute(FileName, sys::path::Style::windows) || + sys::path::is_absolute(FileName, sys::path::Style::posix)) + return; + + // It's not absolute in any path syntax. Relative paths necessarily refer to + // the local file system, so we can make it native without ending up with a + // nonsensical path. + sys::path::native(FileName); + if (Config->PDBSourcePath.empty()) { + sys::fs::make_absolute(FileName); + return; + } + // Only apply native and dot removal to the relative file path. We want to + // leave the path the user specified untouched since we assume they specified + // it for a reason. + sys::path::remove_dots(FileName, /*remove_dot_dots=*/true); + + SmallString<128> AbsoluteFileName = Config->PDBSourcePath; + sys::path::append(AbsoluteFileName, FileName); + FileName = std::move(AbsoluteFileName); +} + static SectionChunk *findByName(ArrayRef<SectionChunk *> Sections, StringRef Name) { for (SectionChunk *C : Sections) @@ -242,27 +376,79 @@ static void addTypeInfo(pdb::TpiStreamBuilder &TpiBuilder, }); } -static Optional<TypeServer2Record> -maybeReadTypeServerRecord(CVTypeArray &Types) { - auto I = Types.begin(); - if (I == Types.end()) - return None; - const CVType &Type = *I; - if (Type.kind() != LF_TYPESERVER2) - return None; - TypeServer2Record TS; - if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(Type), TS)) - fatal("error reading type server record: " + toString(std::move(EC))); - return std::move(TS); +// OBJs usually start their symbol stream with a S_OBJNAME record. This record +// also contains the signature/key of the current PCH session. The signature +// must be same for all objects which depend on the precompiled object. +// Recompiling the precompiled headers will generate a new PCH key and thus +// invalidate all the dependent objects. +static uint32_t extractPCHSignature(ObjFile *File) { + auto DbgIt = find_if(File->getDebugChunks(), [](SectionChunk *C) { + return C->getSectionName() == ".debug$S"; + }); + if (!DbgIt) + return 0; + + ArrayRef<uint8_t> Contents = + consumeDebugMagic((*DbgIt)->getContents(), ".debug$S"); + DebugSubsectionArray Subsections; + BinaryStreamReader Reader(Contents, support::little); + ExitOnErr(Reader.readArray(Subsections, Contents.size())); + + for (const DebugSubsectionRecord &SS : Subsections) { + if (SS.kind() != DebugSubsectionKind::Symbols) + continue; + + // If it's there, the S_OBJNAME record shall come first in the stream. + Expected<CVSymbol> Sym = readSymbolFromStream(SS.getRecordData(), 0); + if (!Sym) { + consumeError(Sym.takeError()); + continue; + } + if (auto ObjName = SymbolDeserializer::deserializeAs<ObjNameSym>(Sym.get())) + return ObjName->Signature; + } + return 0; } -Expected<const CVIndexMap&> PDBLinker::mergeDebugT(ObjFile *File, - CVIndexMap &ObjectIndexMap) { +Expected<const CVIndexMap &> +PDBLinker::mergeDebugT(ObjFile *File, CVIndexMap *ObjectIndexMap) { ScopedTimer T(TypeMergingTimer); + bool IsPrecompiledHeader = false; + ArrayRef<uint8_t> Data = getDebugSection(File, ".debug$T"); + if (Data.empty()) { + // Try again, Microsoft precompiled headers use .debug$P instead of + // .debug$T + Data = getDebugSection(File, ".debug$P"); + IsPrecompiledHeader = true; + } if (Data.empty()) - return ObjectIndexMap; + return *ObjectIndexMap; // no debug info + + // Precompiled headers objects need to save the index map for further + // reference by other objects which use the precompiled headers. + if (IsPrecompiledHeader) { + uint32_t PCHSignature = extractPCHSignature(File); + if (PCHSignature == 0) + fatal("No signature found for the precompiled headers OBJ (" + + File->getName() + ")"); + + // When a precompiled headers object comes first on the command-line, we + // update the mapping here. Otherwise, if an object referencing the + // precompiled headers object comes first, the mapping is created in + // aquirePrecompObj(), thus we would skip this block. + if (!ObjectIndexMap->IsPrecompiledTypeMap) { + auto R = registerPrecompiledHeaders(PCHSignature); + if (R.second) + fatal( + "A precompiled headers OBJ with the same signature was already " + "provided! (" + + File->getName() + ")"); + + ObjectIndexMap = &R.first; + } + } BinaryByteStream Stream(Data, support::little); CVTypeArray Types; @@ -270,13 +456,32 @@ Expected<const CVIndexMap&> PDBLinker::mergeDebugT(ObjFile *File, if (auto EC = Reader.readArray(Types, Reader.getLength())) fatal("Reader::readArray failed: " + toString(std::move(EC))); - // Look through type servers. If we've already seen this type server, don't - // merge any type information. - if (Optional<TypeServer2Record> TS = maybeReadTypeServerRecord(Types)) - return maybeMergeTypeServerPDB(File, *TS); + auto FirstType = Types.begin(); + if (FirstType == Types.end()) + return *ObjectIndexMap; + + if (FirstType->kind() == LF_TYPESERVER2) { + // Look through type servers. If we've already seen this type server, + // don't merge any type information. + return maybeMergeTypeServerPDB(File, *FirstType); + } else if (FirstType->kind() == LF_PRECOMP) { + // This object was compiled with /Yu, so process the corresponding + // precompiled headers object (/Yc) first. Some type indices in the current + // object are referencing data in the precompiled headers object, so we need + // both to be loaded. + auto E = mergeInPrecompHeaderObj(File, *FirstType, ObjectIndexMap); + if (!E) + return E.takeError(); + + // Drop LF_PRECOMP record from the input stream, as it needs to be replaced + // with the precompiled headers object type stream. + // Note that we can't just call Types.drop_front(), as we explicitly want to + // rebase the stream. + Types.setUnderlyingStream( + Types.getUnderlyingStream().drop_front(FirstType->RecordData.size())); + } - // This is a /Z7 object. Fill in the temporary, caller-provided - // ObjectIndexMap. + // Fill in the temporary, caller-provided ObjectIndexMap. if (Config->DebugGHashes) { ArrayRef<GloballyHashedType> Hashes; std::vector<GloballyHashedType> OwnedHashes; @@ -288,20 +493,28 @@ Expected<const CVIndexMap&> PDBLinker::mergeDebugT(ObjFile *File, } if (auto Err = mergeTypeAndIdRecords(GlobalIDTable, GlobalTypeTable, - ObjectIndexMap.TPIMap, Types, Hashes)) + ObjectIndexMap->TPIMap, Types, Hashes, + File->PCHSignature)) fatal("codeview::mergeTypeAndIdRecords failed: " + toString(std::move(Err))); } else { - if (auto Err = mergeTypeAndIdRecords(IDTable, TypeTable, - ObjectIndexMap.TPIMap, Types)) + if (auto Err = + mergeTypeAndIdRecords(IDTable, TypeTable, ObjectIndexMap->TPIMap, + Types, File->PCHSignature)) fatal("codeview::mergeTypeAndIdRecords failed: " + toString(std::move(Err))); } - return ObjectIndexMap; + return *ObjectIndexMap; } static Expected<std::unique_ptr<pdb::NativeSession>> -tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) { +tryToLoadPDB(const codeview::GUID &GuidFromObj, StringRef TSPath) { + // Ensure the file exists before anything else. We want to return ENOENT, + // "file not found", even if the path points to a removable device (in which + // case the return message would be EAGAIN, "resource unavailable try again") + if (!llvm::sys::fs::exists(TSPath)) + return errorCodeToError(std::error_code(ENOENT, std::generic_category())); + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile( TSPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); if (!MBOrErr) @@ -326,21 +539,27 @@ tryToLoadPDB(const GUID &GuidFromObj, StringRef TSPath) { // PDB file doesn't mean it matches. For it to match the InfoStream's GUID // must match the GUID specified in the TypeServer2 record. if (ExpectedInfo->getGuid() != GuidFromObj) - return make_error<pdb::GenericError>( - pdb::generic_error_code::type_server_not_found, TSPath); + return make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date); return std::move(NS); } -Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, - TypeServer2Record &TS) { - const GUID& TSId = TS.getGuid(); +Expected<const CVIndexMap &> +PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, const CVType &FirstType) { + TypeServer2Record TS; + if (auto EC = + TypeDeserializer::deserializeAs(const_cast<CVType &>(FirstType), TS)) + fatal("error reading record: " + toString(std::move(EC))); + + const codeview::GUID &TSId = TS.getGuid(); StringRef TSPath = TS.getName(); // First, check if the PDB has previously failed to load. - if (MissingTypeServerPDBs.count(TSId)) - return make_error<pdb::GenericError>( - pdb::generic_error_code::type_server_not_found, TSPath); + auto PrevErr = MissingTypeServerPDBs.find(TSId); + if (PrevErr != MissingTypeServerPDBs.end()) + return createFileError( + TSPath, + make_error<StringError>(PrevErr->second, inconvertibleErrorCode())); // Second, check if we already loaded a PDB with this GUID. Return the type // index mapping if we have it. @@ -355,20 +574,39 @@ Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, // Check for a PDB at: // 1. The given file path // 2. Next to the object file or archive file - auto ExpectedSession = tryToLoadPDB(TSId, TSPath); - if (!ExpectedSession) { - consumeError(ExpectedSession.takeError()); - StringRef LocalPath = - !File->ParentName.empty() ? File->ParentName : File->getName(); - SmallString<128> Path = sys::path::parent_path(LocalPath); - sys::path::append( - Path, sys::path::filename(TSPath, sys::path::Style::windows)); - ExpectedSession = tryToLoadPDB(TSId, Path); - } + auto ExpectedSession = handleExpected( + tryToLoadPDB(TSId, TSPath), + [&]() { + StringRef LocalPath = + !File->ParentName.empty() ? File->ParentName : File->getName(); + SmallString<128> Path = sys::path::parent_path(LocalPath); + // Currently, type server PDBs are only created by cl, which only runs + // on Windows, so we can assume type server paths are Windows style. + sys::path::append( + Path, sys::path::filename(TSPath, sys::path::Style::windows)); + return tryToLoadPDB(TSId, Path); + }, + [&](std::unique_ptr<ECError> EC) -> Error { + auto SysErr = EC->convertToErrorCode(); + // Only re-try loading if the previous error was "No such file or + // directory" + if (SysErr.category() == std::generic_category() && + SysErr.value() == ENOENT) + return Error::success(); + return Error(std::move(EC)); + }); + if (auto E = ExpectedSession.takeError()) { TypeServerIndexMappings.erase(TSId); - MissingTypeServerPDBs.emplace(TSId); - return std::move(E); + + // Flatten the error to a string, for later display, if the error occurs + // again on the same PDB. + std::string ErrMsg; + raw_string_ostream S(ErrMsg); + S << E; + MissingTypeServerPDBs.emplace(TSId, S.str()); + + return createFileError(TSPath, std::move(E)); } pdb::NativeSession *Session = ExpectedSession->get(); @@ -394,9 +632,10 @@ Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, auto IpiHashes = GloballyHashedType::hashIds(ExpectedIpi->typeArray(), TpiHashes); + Optional<uint32_t> EndPrecomp; // Merge TPI first, because the IPI stream will reference type indices. if (auto Err = mergeTypeRecords(GlobalTypeTable, IndexMap.TPIMap, - ExpectedTpi->typeArray(), TpiHashes)) + ExpectedTpi->typeArray(), TpiHashes, EndPrecomp)) fatal("codeview::mergeTypeRecords failed: " + toString(std::move(Err))); // Merge IPI. @@ -419,6 +658,103 @@ Expected<const CVIndexMap&> PDBLinker::maybeMergeTypeServerPDB(ObjFile *File, return IndexMap; } +Expected<const CVIndexMap &> +PDBLinker::mergeInPrecompHeaderObj(ObjFile *File, const CVType &FirstType, + CVIndexMap *ObjectIndexMap) { + PrecompRecord Precomp; + if (auto EC = TypeDeserializer::deserializeAs(const_cast<CVType &>(FirstType), + Precomp)) + fatal("error reading record: " + toString(std::move(EC))); + + auto E = aquirePrecompObj(File, Precomp); + if (!E) + return E.takeError(); + + const CVIndexMap &PrecompIndexMap = *E; + assert(PrecompIndexMap.IsPrecompiledTypeMap); + + if (PrecompIndexMap.TPIMap.empty()) + return PrecompIndexMap; + + assert(Precomp.getStartTypeIndex() == TypeIndex::FirstNonSimpleIndex); + assert(Precomp.getTypesCount() <= PrecompIndexMap.TPIMap.size()); + // Use the previously remapped index map from the precompiled headers. + ObjectIndexMap->TPIMap.append(PrecompIndexMap.TPIMap.begin(), + PrecompIndexMap.TPIMap.begin() + + Precomp.getTypesCount()); + return *ObjectIndexMap; +} + +static bool equals_path(StringRef path1, StringRef path2) { +#if defined(_WIN32) + return path1.equals_lower(path2); +#else + return path1.equals(path2); +#endif +} + +// Find by name an OBJ provided on the command line +static ObjFile *findObjByName(StringRef FileNameOnly) { + SmallString<128> CurrentPath; + + for (ObjFile *F : ObjFile::Instances) { + StringRef CurrentFileName = sys::path::filename(F->getName()); + + // Compare based solely on the file name (link.exe behavior) + if (equals_path(CurrentFileName, FileNameOnly)) + return F; + } + return nullptr; +} + +std::pair<CVIndexMap &, bool /*already there*/> +PDBLinker::registerPrecompiledHeaders(uint32_t Signature) { + auto Insertion = PrecompTypeIndexMappings.insert({Signature, CVIndexMap()}); + CVIndexMap &IndexMap = Insertion.first->second; + if (!Insertion.second) + return {IndexMap, true}; + // Mark this map as a precompiled types map. + IndexMap.IsPrecompiledTypeMap = true; + return {IndexMap, false}; +} + +Expected<const CVIndexMap &> +PDBLinker::aquirePrecompObj(ObjFile *File, PrecompRecord Precomp) { + // First, check if we already loaded the precompiled headers object with this + // signature. Return the type index mapping if we've already seen it. + auto R = registerPrecompiledHeaders(Precomp.getSignature()); + if (R.second) + return R.first; + + CVIndexMap &IndexMap = R.first; + + // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP + // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly, + // the paths embedded in the OBJs are in the Windows format. + SmallString<128> PrecompFileName = sys::path::filename( + Precomp.getPrecompFilePath(), sys::path::Style::windows); + + // link.exe requires that a precompiled headers object must always be provided + // on the command-line, even if that's not necessary. + auto PrecompFile = findObjByName(PrecompFileName); + if (!PrecompFile) + return createFileError( + PrecompFileName.str(), + make_error<pdb::PDBError>(pdb::pdb_error_code::external_cmdline_ref)); + + addObjFile(PrecompFile, &IndexMap); + + if (!PrecompFile->PCHSignature) + fatal(PrecompFile->getName() + " is not a precompiled headers object"); + + if (Precomp.getSignature() != PrecompFile->PCHSignature.getValueOr(0)) + return createFileError( + Precomp.getPrecompFilePath().str(), + make_error<pdb::PDBError>(pdb::pdb_error_code::signature_out_of_date)); + + return IndexMap; +} + static bool remapTypeIndex(TypeIndex &TI, ArrayRef<TypeIndex> TypeIndexMap) { if (TI.isSimple()) return true; @@ -429,9 +765,11 @@ static bool remapTypeIndex(TypeIndex &TI, ArrayRef<TypeIndex> TypeIndexMap) { } static void remapTypesInSymbolRecord(ObjFile *File, SymbolKind SymKind, - MutableArrayRef<uint8_t> Contents, + MutableArrayRef<uint8_t> RecordBytes, const CVIndexMap &IndexMap, ArrayRef<TiReference> TypeRefs) { + MutableArrayRef<uint8_t> Contents = + RecordBytes.drop_front(sizeof(RecordPrefix)); for (const TiReference &Ref : TypeRefs) { unsigned ByteSize = Ref.Count * sizeof(TypeIndex); if (Contents.size() < Ref.Offset + ByteSize) @@ -477,7 +815,7 @@ recordStringTableReferences(SymbolKind Kind, MutableArrayRef<uint8_t> Contents, switch (Kind) { case SymbolKind::S_FILESTATIC: // FileStaticSym::ModFileOffset - recordStringTableReferenceAtOffset(Contents, 4, StrTableRefs); + recordStringTableReferenceAtOffset(Contents, 8, StrTableRefs); break; case SymbolKind::S_DEFRANGE: case SymbolKind::S_DEFRANGE_SUBFIELD: @@ -542,58 +880,26 @@ static void translateIdSymbols(MutableArrayRef<uint8_t> &RecordData, /// Copy the symbol record. In a PDB, symbol records must be 4 byte aligned. /// The object file may not be aligned. -static MutableArrayRef<uint8_t> copySymbolForPdb(const CVSymbol &Sym, - BumpPtrAllocator &Alloc) { +static MutableArrayRef<uint8_t> +copyAndAlignSymbol(const CVSymbol &Sym, MutableArrayRef<uint8_t> &AlignedMem) { size_t Size = alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); assert(Size >= 4 && "record too short"); assert(Size <= MaxRecordLength && "record too long"); - void *Mem = Alloc.Allocate(Size, 4); + assert(AlignedMem.size() >= Size && "didn't preallocate enough"); // Copy the symbol record and zero out any padding bytes. - MutableArrayRef<uint8_t> NewData(reinterpret_cast<uint8_t *>(Mem), Size); + MutableArrayRef<uint8_t> NewData = AlignedMem.take_front(Size); + AlignedMem = AlignedMem.drop_front(Size); memcpy(NewData.data(), Sym.data().data(), Sym.length()); memset(NewData.data() + Sym.length(), 0, Size - Sym.length()); // Update the record prefix length. It should point to the beginning of the // next record. - auto *Prefix = reinterpret_cast<RecordPrefix *>(Mem); + auto *Prefix = reinterpret_cast<RecordPrefix *>(NewData.data()); Prefix->RecordLen = Size - 2; return NewData; } -/// Return true if this symbol opens a scope. This implies that the symbol has -/// "parent" and "end" fields, which contain the offset of the S_END or -/// S_INLINESITE_END record. -static bool symbolOpensScope(SymbolKind Kind) { - switch (Kind) { - case SymbolKind::S_GPROC32: - case SymbolKind::S_LPROC32: - case SymbolKind::S_LPROC32_ID: - case SymbolKind::S_GPROC32_ID: - case SymbolKind::S_BLOCK32: - case SymbolKind::S_SEPCODE: - case SymbolKind::S_THUNK32: - case SymbolKind::S_INLINESITE: - case SymbolKind::S_INLINESITE2: - return true; - default: - break; - } - return false; -} - -static bool symbolEndsScope(SymbolKind Kind) { - switch (Kind) { - case SymbolKind::S_END: - case SymbolKind::S_PROC_ID_END: - case SymbolKind::S_INLINESITE_END: - return true; - default: - break; - } - return false; -} - struct ScopeRecord { ulittle32_t PtrParent; ulittle32_t PtrEnd; @@ -625,11 +931,10 @@ static void scopeStackClose(SmallVectorImpl<SymbolScope> &Stack, S.OpeningRecord->PtrEnd = CurOffset; } -static bool symbolGoesInModuleStream(const CVSymbol &Sym) { +static bool symbolGoesInModuleStream(const CVSymbol &Sym, bool IsGlobalScope) { switch (Sym.kind()) { case SymbolKind::S_GDATA32: case SymbolKind::S_CONSTANT: - case SymbolKind::S_UDT: // We really should not be seeing S_PROCREF and S_LPROCREF in the first place // since they are synthesized by the linker in response to S_GPROC32 and // S_LPROC32, but if we do see them, don't put them in the module stream I @@ -637,6 +942,9 @@ static bool symbolGoesInModuleStream(const CVSymbol &Sym) { case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: return false; + // S_UDT records go in the module stream if it is not a global S_UDT. + case SymbolKind::S_UDT: + return !IsGlobalScope; // S_GDATA32 does not go in the module stream, but S_LDATA32 does. case SymbolKind::S_LDATA32: default: @@ -644,7 +952,7 @@ static bool symbolGoesInModuleStream(const CVSymbol &Sym) { } } -static bool symbolGoesInGlobalsStream(const CVSymbol &Sym) { +static bool symbolGoesInGlobalsStream(const CVSymbol &Sym, bool IsGlobalScope) { switch (Sym.kind()) { case SymbolKind::S_CONSTANT: case SymbolKind::S_GDATA32: @@ -658,20 +966,16 @@ static bool symbolGoesInGlobalsStream(const CVSymbol &Sym) { case SymbolKind::S_PROCREF: case SymbolKind::S_LPROCREF: return true; - // FIXME: For now, we drop all S_UDT symbols (i.e. they don't go in the - // globals stream or the modules stream). These have special handling which - // needs more investigation before we can get right, but by putting them all - // into the globals stream WinDbg fails to display local variables of class - // types saying that it cannot find the type Foo *. So as a stopgap just to - // keep things working, we drop them. + // S_UDT records go in the globals stream if it is a global S_UDT. case SymbolKind::S_UDT: + return IsGlobalScope; default: return false; } } -static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, ObjFile &File, - const CVSymbol &Sym) { +static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, uint16_t ModIndex, + unsigned SymOffset, const CVSymbol &Sym) { switch (Sym.kind()) { case SymbolKind::S_CONSTANT: case SymbolKind::S_UDT: @@ -687,12 +991,12 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, ObjFile &File, if (Sym.kind() == SymbolKind::S_LPROC32) K = SymbolRecordKind::LocalProcRef; ProcRefSym PS(K); - PS.Module = static_cast<uint16_t>(File.ModuleDBI->getModuleIndex()); + PS.Module = ModIndex; // For some reason, MSVC seems to add one to this value. ++PS.Module; PS.Name = getSymbolName(Sym); PS.SumName = 0; - PS.SymOffset = File.ModuleDBI->getNextSymbolOffset(); + PS.SymOffset = SymOffset; Builder.addGlobalSymbol(PS); break; } @@ -701,20 +1005,62 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &Builder, ObjFile &File, } } -static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File, - pdb::GSIStreamBuilder &GsiBuilder, - const CVIndexMap &IndexMap, - TypeCollection &IDTable, - std::vector<ulittle32_t *> &StringTableRefs, - BinaryStreamRef SymData) { - // FIXME: Improve error recovery by warning and skipping records when - // possible. +void PDBLinker::mergeSymbolRecords(ObjFile *File, const CVIndexMap &IndexMap, + std::vector<ulittle32_t *> &StringTableRefs, + BinaryStreamRef SymData) { ArrayRef<uint8_t> SymsBuffer; cantFail(SymData.readBytes(0, SymData.getLength(), SymsBuffer)); SmallVector<SymbolScope, 4> Scopes; + // Iterate every symbol to check if any need to be realigned, and if so, how + // much space we need to allocate for them. + bool NeedsRealignment = false; + unsigned TotalRealignedSize = 0; auto EC = forEachCodeViewRecord<CVSymbol>( - SymsBuffer, [&](const CVSymbol &Sym) -> llvm::Error { + SymsBuffer, [&](CVSymbol Sym) -> llvm::Error { + unsigned RealignedSize = + alignTo(Sym.length(), alignOf(CodeViewContainer::Pdb)); + NeedsRealignment |= RealignedSize != Sym.length(); + TotalRealignedSize += RealignedSize; + return Error::success(); + }); + + // If any of the symbol record lengths was corrupt, ignore them all, warn + // about it, and move on. + if (EC) { + warn("corrupt symbol records in " + File->getName()); + consumeError(std::move(EC)); + return; + } + + // If any symbol needed realignment, allocate enough contiguous memory for + // them all. Typically symbol subsections are small enough that this will not + // cause fragmentation. + MutableArrayRef<uint8_t> AlignedSymbolMem; + if (NeedsRealignment) { + void *AlignedData = + Alloc.Allocate(TotalRealignedSize, alignOf(CodeViewContainer::Pdb)); + AlignedSymbolMem = makeMutableArrayRef( + reinterpret_cast<uint8_t *>(AlignedData), TotalRealignedSize); + } + + // Iterate again, this time doing the real work. + unsigned CurSymOffset = File->ModuleDBI->getNextSymbolOffset(); + ArrayRef<uint8_t> BulkSymbols; + cantFail(forEachCodeViewRecord<CVSymbol>( + SymsBuffer, [&](CVSymbol Sym) -> llvm::Error { + // Align the record if required. + MutableArrayRef<uint8_t> RecordBytes; + if (NeedsRealignment) { + RecordBytes = copyAndAlignSymbol(Sym, AlignedSymbolMem); + Sym = CVSymbol(Sym.kind(), RecordBytes); + } else { + // Otherwise, we can actually mutate the symbol directly, since we + // copied it to apply relocations. + RecordBytes = makeMutableArrayRef( + const_cast<uint8_t *>(Sym.data().data()), Sym.length()); + } + // Discover type index references in the record. Skip it if we don't // know where they are. SmallVector<TiReference, 32> TypeRefs; @@ -724,57 +1070,62 @@ static void mergeSymbolRecords(BumpPtrAllocator &Alloc, ObjFile *File, return Error::success(); } - // Copy the symbol record so we can mutate it. - MutableArrayRef<uint8_t> NewData = copySymbolForPdb(Sym, Alloc); - // Re-map all the type index references. - MutableArrayRef<uint8_t> Contents = - NewData.drop_front(sizeof(RecordPrefix)); - remapTypesInSymbolRecord(File, Sym.kind(), Contents, IndexMap, + remapTypesInSymbolRecord(File, Sym.kind(), RecordBytes, IndexMap, TypeRefs); // An object file may have S_xxx_ID symbols, but these get converted to // "real" symbols in a PDB. - translateIdSymbols(NewData, IDTable); + translateIdSymbols(RecordBytes, getIDTable()); + Sym = CVSymbol(symbolKind(RecordBytes), RecordBytes); // If this record refers to an offset in the object file's string table, // add that item to the global PDB string table and re-write the index. - recordStringTableReferences(Sym.kind(), Contents, StringTableRefs); - - SymbolKind NewKind = symbolKind(NewData); + recordStringTableReferences(Sym.kind(), RecordBytes, StringTableRefs); // Fill in "Parent" and "End" fields by maintaining a stack of scopes. - CVSymbol NewSym(NewKind, NewData); - if (symbolOpensScope(NewKind)) - scopeStackOpen(Scopes, File->ModuleDBI->getNextSymbolOffset(), - NewSym); - else if (symbolEndsScope(NewKind)) - scopeStackClose(Scopes, File->ModuleDBI->getNextSymbolOffset(), File); + if (symbolOpensScope(Sym.kind())) + scopeStackOpen(Scopes, CurSymOffset, Sym); + else if (symbolEndsScope(Sym.kind())) + scopeStackClose(Scopes, CurSymOffset, File); // Add the symbol to the globals stream if necessary. Do this before // adding the symbol to the module since we may need to get the next // symbol offset, and writing to the module's symbol stream will update // that offset. - if (symbolGoesInGlobalsStream(NewSym)) - addGlobalSymbol(GsiBuilder, *File, NewSym); - - // Add the symbol to the module. - if (symbolGoesInModuleStream(NewSym)) - File->ModuleDBI->addSymbol(NewSym); + if (symbolGoesInGlobalsStream(Sym, Scopes.empty())) + addGlobalSymbol(Builder.getGsiBuilder(), + File->ModuleDBI->getModuleIndex(), CurSymOffset, Sym); + + if (symbolGoesInModuleStream(Sym, Scopes.empty())) { + // Add symbols to the module in bulk. If this symbol is contiguous + // with the previous run of symbols to add, combine the ranges. If + // not, close the previous range of symbols and start a new one. + if (Sym.data().data() == BulkSymbols.end()) { + BulkSymbols = makeArrayRef(BulkSymbols.data(), + BulkSymbols.size() + Sym.length()); + } else { + File->ModuleDBI->addSymbolsInBulk(BulkSymbols); + BulkSymbols = RecordBytes; + } + CurSymOffset += Sym.length(); + } return Error::success(); - }); - cantFail(std::move(EC)); + })); + + // Add any remaining symbols we've accumulated. + File->ModuleDBI->addSymbolsInBulk(BulkSymbols); } -// Allocate memory for a .debug$S section and relocate it. +// Allocate memory for a .debug$S / .debug$F section and relocate it. static ArrayRef<uint8_t> relocateDebugChunk(BumpPtrAllocator &Alloc, - SectionChunk *DebugChunk) { - uint8_t *Buffer = Alloc.Allocate<uint8_t>(DebugChunk->getSize()); - assert(DebugChunk->OutputSectionOff == 0 && + SectionChunk &DebugChunk) { + uint8_t *Buffer = Alloc.Allocate<uint8_t>(DebugChunk.getSize()); + assert(DebugChunk.OutputSectionOff == 0 && "debug sections should not be in output sections"); - DebugChunk->writeTo(Buffer); - return consumeDebugMagic(makeArrayRef(Buffer, DebugChunk->getSize()), - ".debug$S"); + DebugChunk.readRelocTargets(); + DebugChunk.writeTo(Buffer); + return makeArrayRef(Buffer, DebugChunk.getSize()); } static pdb::SectionContrib createSectionContrib(const Chunk *C, uint32_t Modi) { @@ -803,25 +1154,137 @@ static pdb::SectionContrib createSectionContrib(const Chunk *C, uint32_t Modi) { return SC; } -void PDBLinker::addObjFile(ObjFile *File) { +static uint32_t +translateStringTableIndex(uint32_t ObjIndex, + const DebugStringTableSubsectionRef &ObjStrTable, + DebugStringTableSubsection &PdbStrTable) { + auto ExpectedString = ObjStrTable.getString(ObjIndex); + if (!ExpectedString) { + warn("Invalid string table reference"); + consumeError(ExpectedString.takeError()); + return 0; + } + + return PdbStrTable.insert(*ExpectedString); +} + +void DebugSHandler::handleDebugS(lld::coff::SectionChunk &DebugS) { + DebugSubsectionArray Subsections; + + ArrayRef<uint8_t> RelocatedDebugContents = consumeDebugMagic( + relocateDebugChunk(Linker.Alloc, DebugS), DebugS.getSectionName()); + + BinaryStreamReader Reader(RelocatedDebugContents, support::little); + ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size())); + + for (const DebugSubsectionRecord &SS : Subsections) { + switch (SS.kind()) { + case DebugSubsectionKind::StringTable: { + assert(!CVStrTab.valid() && + "Encountered multiple string table subsections!"); + ExitOnErr(CVStrTab.initialize(SS.getRecordData())); + break; + } + case DebugSubsectionKind::FileChecksums: + assert(!Checksums.valid() && + "Encountered multiple checksum subsections!"); + ExitOnErr(Checksums.initialize(SS.getRecordData())); + break; + case DebugSubsectionKind::Lines: + // We can add the relocated line table directly to the PDB without + // modification because the file checksum offsets will stay the same. + File.ModuleDBI->addDebugSubsection(SS); + break; + case DebugSubsectionKind::FrameData: { + // We need to re-write string table indices here, so save off all + // frame data subsections until we've processed the entire list of + // subsections so that we can be sure we have the string table. + DebugFrameDataSubsectionRef FDS; + ExitOnErr(FDS.initialize(SS.getRecordData())); + NewFpoFrames.push_back(std::move(FDS)); + break; + } + case DebugSubsectionKind::Symbols: { + Linker.mergeSymbolRecords(&File, IndexMap, StringTableReferences, + SS.getRecordData()); + break; + } + default: + // FIXME: Process the rest of the subsections. + break; + } + } +} + +void DebugSHandler::finish() { + pdb::DbiStreamBuilder &DbiBuilder = Linker.Builder.getDbiBuilder(); + + // We should have seen all debug subsections across the entire object file now + // which means that if a StringTable subsection and Checksums subsection were + // present, now is the time to handle them. + if (!CVStrTab.valid()) { + if (Checksums.valid()) + fatal(".debug$S sections with a checksums subsection must also contain a " + "string table subsection"); + + if (!StringTableReferences.empty()) + warn("No StringTable subsection was encountered, but there are string " + "table references"); + return; + } + + // Rewrite string table indices in the Fpo Data and symbol records to refer to + // the global PDB string table instead of the object file string table. + for (DebugFrameDataSubsectionRef &FDS : NewFpoFrames) { + const ulittle32_t *Reloc = FDS.getRelocPtr(); + for (codeview::FrameData FD : FDS) { + FD.RvaStart += *Reloc; + FD.FrameFunc = + translateStringTableIndex(FD.FrameFunc, CVStrTab, Linker.PDBStrTab); + DbiBuilder.addNewFpoData(FD); + } + } + + for (ulittle32_t *Ref : StringTableReferences) + *Ref = translateStringTableIndex(*Ref, CVStrTab, Linker.PDBStrTab); + + // Make a new file checksum table that refers to offsets in the PDB-wide + // string table. Generally the string table subsection appears after the + // checksum table, so we have to do this after looping over all the + // subsections. + auto NewChecksums = make_unique<DebugChecksumsSubsection>(Linker.PDBStrTab); + for (FileChecksumEntry &FC : Checksums) { + SmallString<128> FileName = + ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); + pdbMakeAbsolute(FileName); + ExitOnErr(Linker.Builder.getDbiBuilder().addModuleSourceFile( + *File.ModuleDBI, FileName)); + NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); + } + File.ModuleDBI->addDebugSubsection(std::move(NewChecksums)); +} + +void PDBLinker::addObjFile(ObjFile *File, CVIndexMap *ExternIndexMap) { + if (File->wasProcessedForPDB()) + return; // Add a module descriptor for every object file. We need to put an absolute // path to the object into the PDB. If this is a plain object, we make its // path absolute. If it's an object in an archive, we make the archive path // absolute. bool InArchive = !File->ParentName.empty(); SmallString<128> Path = InArchive ? File->ParentName : File->getName(); - sys::fs::make_absolute(Path); - sys::path::native(Path, sys::path::Style::windows); + pdbMakeAbsolute(Path); StringRef Name = InArchive ? File->getName() : StringRef(Path); - File->ModuleDBI = &ExitOnErr(Builder.getDbiBuilder().addModuleInfo(Name)); + pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); + File->ModuleDBI = &ExitOnErr(DbiBuilder.addModuleInfo(Name)); File->ModuleDBI->setObjFileName(Path); auto Chunks = File->getChunks(); uint32_t Modi = File->ModuleDBI->getModuleIndex(); for (Chunk *C : Chunks) { auto *SecChunk = dyn_cast<SectionChunk>(C); - if (!SecChunk || !SecChunk->isLive()) + if (!SecChunk || !SecChunk->Live) continue; pdb::SectionContrib SC = createSectionContrib(SecChunk, Modi); File->ModuleDBI->setFirstSectionContrib(SC); @@ -833,119 +1296,54 @@ void PDBLinker::addObjFile(ObjFile *File) { // the PDB first, so that we can get the map from object file type and item // indices to PDB type and item indices. CVIndexMap ObjectIndexMap; - auto IndexMapResult = mergeDebugT(File, ObjectIndexMap); + auto IndexMapResult = + mergeDebugT(File, ExternIndexMap ? ExternIndexMap : &ObjectIndexMap); // If the .debug$T sections fail to merge, assume there is no debug info. if (!IndexMapResult) { - warn("Type server PDB for " + Name + " is invalid, ignoring debug info. " + - toString(IndexMapResult.takeError())); + if (!Config->WarnDebugInfoUnusable) { + consumeError(IndexMapResult.takeError()); + return; + } + StringRef FileName = sys::path::filename(Path); + warn("Cannot use debug info for '" + FileName + "' [LNK4099]\n" + + ">>> failed to load reference " + + StringRef(toString(IndexMapResult.takeError()))); return; } - const CVIndexMap &IndexMap = *IndexMapResult; - ScopedTimer T(SymbolMergingTimer); - // Now do all live .debug$S sections. - DebugStringTableSubsectionRef CVStrTab; - DebugChecksumsSubsectionRef Checksums; - std::vector<ulittle32_t *> StringTableReferences; + DebugSHandler DSH(*this, *File, *IndexMapResult); + // Now do all live .debug$S and .debug$F sections. for (SectionChunk *DebugChunk : File->getDebugChunks()) { - if (!DebugChunk->isLive() || DebugChunk->getSectionName() != ".debug$S") + if (!DebugChunk->Live || DebugChunk->getSize() == 0) continue; - ArrayRef<uint8_t> RelocatedDebugContents = - relocateDebugChunk(Alloc, DebugChunk); - if (RelocatedDebugContents.empty()) + if (DebugChunk->getSectionName() == ".debug$S") { + DSH.handleDebugS(*DebugChunk); continue; - - DebugSubsectionArray Subsections; - BinaryStreamReader Reader(RelocatedDebugContents, support::little); - ExitOnErr(Reader.readArray(Subsections, RelocatedDebugContents.size())); - - for (const DebugSubsectionRecord &SS : Subsections) { - switch (SS.kind()) { - case DebugSubsectionKind::StringTable: { - assert(!CVStrTab.valid() && - "Encountered multiple string table subsections!"); - ExitOnErr(CVStrTab.initialize(SS.getRecordData())); - break; - } - case DebugSubsectionKind::FileChecksums: - assert(!Checksums.valid() && - "Encountered multiple checksum subsections!"); - ExitOnErr(Checksums.initialize(SS.getRecordData())); - break; - case DebugSubsectionKind::Lines: - // We can add the relocated line table directly to the PDB without - // modification because the file checksum offsets will stay the same. - File->ModuleDBI->addDebugSubsection(SS); - break; - case DebugSubsectionKind::Symbols: - if (Config->DebugGHashes) { - mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap, - GlobalIDTable, StringTableReferences, - SS.getRecordData()); - } else { - mergeSymbolRecords(Alloc, File, Builder.getGsiBuilder(), IndexMap, - IDTable, StringTableReferences, - SS.getRecordData()); - } - break; - default: - // FIXME: Process the rest of the subsections. - break; - } } - } - // We should have seen all debug subsections across the entire object file now - // which means that if a StringTable subsection and Checksums subsection were - // present, now is the time to handle them. - if (!CVStrTab.valid()) { - if (Checksums.valid()) - fatal(".debug$S sections with a checksums subsection must also contain a " - "string table subsection"); + if (DebugChunk->getSectionName() == ".debug$F") { + ArrayRef<uint8_t> RelocatedDebugContents = + relocateDebugChunk(Alloc, *DebugChunk); - if (!StringTableReferences.empty()) - warn("No StringTable subsection was encountered, but there are string " - "table references"); - return; - } + FixedStreamArray<object::FpoData> FpoRecords; + BinaryStreamReader Reader(RelocatedDebugContents, support::little); + uint32_t Count = RelocatedDebugContents.size() / sizeof(object::FpoData); + ExitOnErr(Reader.readArray(FpoRecords, Count)); - // Rewrite each string table reference based on the value that the string - // assumes in the final PDB. - for (ulittle32_t *Ref : StringTableReferences) { - auto ExpectedString = CVStrTab.getString(*Ref); - if (!ExpectedString) { - warn("Invalid string table reference"); - consumeError(ExpectedString.takeError()); + // These are already relocated and don't refer to the string table, so we + // can just copy it. + for (const object::FpoData &FD : FpoRecords) + DbiBuilder.addOldFpoData(FD); continue; } - - *Ref = PDBStrTab.insert(*ExpectedString); } - // Make a new file checksum table that refers to offsets in the PDB-wide - // string table. Generally the string table subsection appears after the - // checksum table, so we have to do this after looping over all the - // subsections. - auto NewChecksums = make_unique<DebugChecksumsSubsection>(PDBStrTab); - for (FileChecksumEntry &FC : Checksums) { - SmallString<128> FileName = ExitOnErr(CVStrTab.getString(FC.FileNameOffset)); - if (!sys::path::is_absolute(FileName) && - !Config->PDBSourcePath.empty()) { - SmallString<128> AbsoluteFileName = Config->PDBSourcePath; - sys::path::append(AbsoluteFileName, FileName); - sys::path::native(AbsoluteFileName); - sys::path::remove_dots(AbsoluteFileName, /*remove_dot_dots=*/true); - FileName = std::move(AbsoluteFileName); - } - ExitOnErr(Builder.getDbiBuilder().addModuleSourceFile(*File->ModuleDBI, - FileName)); - NewChecksums->addChecksum(FileName, FC.Kind, FC.Checksum); - } - File->ModuleDBI->addDebugSubsection(std::move(NewChecksums)); + // Do any post-processing now that all .debug$S sections have been processed. + DSH.finish(); } static PublicSym32 createPublic(Defined *Def) { @@ -977,13 +1375,8 @@ void PDBLinker::addObjectsToPDB() { // Construct TPI and IPI stream contents. ScopedTimer T2(TpiStreamLayoutTimer); - if (Config->DebugGHashes) { - addTypeInfo(Builder.getTpiBuilder(), GlobalTypeTable); - addTypeInfo(Builder.getIpiBuilder(), GlobalIDTable); - } else { - addTypeInfo(Builder.getTpiBuilder(), TypeTable); - addTypeInfo(Builder.getIpiBuilder(), IDTable); - } + addTypeInfo(Builder.getTpiBuilder(), getTypeTable()); + addTypeInfo(Builder.getIpiBuilder(), getIDTable()); T2.stop(); ScopedTimer T3(GlobalsLayoutTimer); @@ -999,10 +1392,10 @@ void PDBLinker::addObjectsToPDB() { if (!Publics.empty()) { // Sort the public symbols and add them to the stream. - std::sort(Publics.begin(), Publics.end(), - [](const PublicSym32 &L, const PublicSym32 &R) { - return L.Name < R.Name; - }); + sort(parallel::par, Publics.begin(), Publics.end(), + [](const PublicSym32 &L, const PublicSym32 &R) { + return L.Name < R.Name; + }); for (const PublicSym32 &Pub : Publics) GsiBuilder.addPublicSymbol(Pub); } @@ -1037,6 +1430,32 @@ static codeview::CPUType toCodeViewMachine(COFF::MachineTypes Machine) { } } +// Mimic MSVC which surrounds arguments containing whitespace with quotes. +// Double double-quotes are handled, so that the resulting string can be +// executed again on the cmd-line. +static std::string quote(ArrayRef<StringRef> Args) { + std::string R; + R.reserve(256); + for (StringRef A : Args) { + if (!R.empty()) + R.push_back(' '); + bool HasWS = A.find(' ') != StringRef::npos; + bool HasQ = A.find('"') != StringRef::npos; + if (HasWS || HasQ) + R.push_back('"'); + if (HasQ) { + SmallVector<StringRef, 4> S; + A.split(S, '"'); + R.append(join(S, "\"\"")); + } else { + R.append(A); + } + if (HasWS || HasQ) + R.push_back('"'); + } + return R; +} + static void addCommonLinkerModuleSymbols(StringRef Path, pdb::DbiModuleDescriptorBuilder &Mod, BumpPtrAllocator &Allocator) { @@ -1072,14 +1491,17 @@ static void addCommonLinkerModuleSymbols(StringRef Path, CS.setLanguage(SourceLanguage::Link); ArrayRef<StringRef> Args = makeArrayRef(Config->Argv).drop_front(); - std::string ArgStr = llvm::join(Args, " "); + std::string ArgStr = quote(Args); EBS.Fields.push_back("cwd"); SmallString<64> cwd; - sys::fs::current_path(cwd); + if (Config->PDBSourcePath.empty()) + sys::fs::current_path(cwd); + else + cwd = Config->PDBSourcePath; EBS.Fields.push_back(cwd); EBS.Fields.push_back("exe"); SmallString<64> exe = Config->Argv[0]; - llvm::sys::fs::make_absolute(exe); + pdbMakeAbsolute(exe); EBS.Fields.push_back(exe); EBS.Fields.push_back("pdb"); EBS.Fields.push_back(Path); @@ -1111,7 +1533,7 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &Mod, void coff::createPDB(SymbolTable *Symtab, ArrayRef<OutputSection *> OutputSections, ArrayRef<uint8_t> SectionTable, - const llvm::codeview::DebugInfo &BuildId) { + llvm::codeview::DebugInfo *BuildId) { ScopedTimer T1(TotalPdbLinkTimer); PDBLinker PDB(Symtab); @@ -1121,12 +1543,19 @@ void coff::createPDB(SymbolTable *Symtab, PDB.addNatvisFiles(); ScopedTimer T2(DiskCommitTimer); - PDB.commit(); + codeview::GUID Guid; + PDB.commit(&Guid); + memcpy(&BuildId->PDB70.Signature, &Guid, 16); } -void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) { +void PDBLinker::initialize(llvm::codeview::DebugInfo *BuildId) { ExitOnErr(Builder.initialize(4096)); // 4096 is blocksize + BuildId->Signature.CVSignature = OMF::Signature::PDB70; + // Signature is set to a hash of the PDB contents when the PDB is done. + memset(BuildId->PDB70.Signature, 0, 16); + BuildId->PDB70.Age = 1; + // Create streams in MSF for predefined streams, namely // PDB, TPI, DBI and IPI. for (int I = 0; I < (int)pdb::kSpecialStreamCount; ++I) @@ -1134,15 +1563,12 @@ void PDBLinker::initialize(const llvm::codeview::DebugInfo &BuildId) { // Add an Info stream. auto &InfoBuilder = Builder.getInfoBuilder(); - GUID uuid; - memcpy(&uuid, &BuildId.PDB70.Signature, sizeof(uuid)); - InfoBuilder.setAge(BuildId.PDB70.Age); - InfoBuilder.setGuid(uuid); InfoBuilder.setVersion(pdb::PdbRaw_ImplVer::PdbImplVC70); + InfoBuilder.setHashPDBContentsToGUID(true); // Add an empty DBI stream. pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); - DbiBuilder.setAge(BuildId.PDB70.Age); + DbiBuilder.setAge(BuildId->PDB70.Age); DbiBuilder.setVersionHeader(pdb::PdbDbiV70); DbiBuilder.setMachineType(Config->Machine); // Technically we are not link.exe 14.11, but there are known cases where @@ -1157,8 +1583,7 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections, // It's not entirely clear what this is, but the * Linker * module uses it. pdb::DbiStreamBuilder &DbiBuilder = Builder.getDbiBuilder(); NativePath = Config->PDBPath; - sys::fs::make_absolute(NativePath); - sys::path::native(NativePath, sys::path::Style::windows); + pdbMakeAbsolute(NativePath); uint32_t PdbFilePathNI = DbiBuilder.addECName(NativePath); auto &LinkerModule = ExitOnErr(DbiBuilder.addModuleInfo("* Linker *")); LinkerModule.setPdbFilePathNI(PdbFilePathNI); @@ -1167,7 +1592,7 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections, // Add section contributions. They must be ordered by ascending RVA. for (OutputSection *OS : OutputSections) { addLinkerModuleSectionSymbol(LinkerModule, *OS, Alloc); - for (Chunk *C : OS->getChunks()) { + for (Chunk *C : OS->Chunks) { pdb::SectionContrib SC = createSectionContrib(C, LinkerModule.getModuleIndex()); Builder.getDbiBuilder().addSectionContrib(SC); @@ -1186,9 +1611,9 @@ void PDBLinker::addSections(ArrayRef<OutputSection *> OutputSections, DbiBuilder.addDbgStream(pdb::DbgHeaderType::SectionHdr, SectionTable)); } -void PDBLinker::commit() { +void PDBLinker::commit(codeview::GUID *Guid) { // Write to a file. - ExitOnErr(Builder.commit(Config->PDBPath)); + ExitOnErr(Builder.commit(Config->PDBPath, Guid)); } static Expected<StringRef> @@ -1315,20 +1740,26 @@ std::pair<StringRef, uint32_t> coff::getFileLine(const SectionChunk *C, if (!findLineTable(C, Addr, CVStrTab, Checksums, Lines, OffsetInLinetable)) return {"", 0}; - uint32_t NameIndex; - uint32_t LineNumber; + Optional<uint32_t> NameIndex; + Optional<uint32_t> LineNumber; for (LineColumnEntry &Entry : Lines) { for (const LineNumberEntry &LN : Entry.LineNumbers) { + LineInfo LI(LN.Flags); if (LN.Offset > OffsetInLinetable) { + if (!NameIndex) { + NameIndex = Entry.NameIndex; + LineNumber = LI.getStartLine(); + } StringRef Filename = - ExitOnErr(getFileName(CVStrTab, Checksums, NameIndex)); - return {Filename, LineNumber}; + ExitOnErr(getFileName(CVStrTab, Checksums, *NameIndex)); + return {Filename, *LineNumber}; } - LineInfo LI(LN.Flags); NameIndex = Entry.NameIndex; LineNumber = LI.getStartLine(); } } - StringRef Filename = ExitOnErr(getFileName(CVStrTab, Checksums, NameIndex)); - return {Filename, LineNumber}; + if (!NameIndex) + return {"", 0}; + StringRef Filename = ExitOnErr(getFileName(CVStrTab, Checksums, *NameIndex)); + return {Filename, *LineNumber}; } diff --git a/COFF/PDB.h b/COFF/PDB.h index a98d129a633b..ea7a9996f415 100644 --- a/COFF/PDB.h +++ b/COFF/PDB.h @@ -28,7 +28,7 @@ class SymbolTable; void createPDB(SymbolTable *Symtab, llvm::ArrayRef<OutputSection *> OutputSections, llvm::ArrayRef<uint8_t> SectionTable, - const llvm::codeview::DebugInfo &BuildId); + llvm::codeview::DebugInfo *BuildId); std::pair<llvm::StringRef, uint32_t> getFileLine(const SectionChunk *C, uint32_t Addr); diff --git a/COFF/SymbolTable.cpp b/COFF/SymbolTable.cpp index b286d865caaf..1a9e0455dc1d 100644 --- a/COFF/SymbolTable.cpp +++ b/COFF/SymbolTable.cpp @@ -60,16 +60,16 @@ void SymbolTable::addFile(InputFile *File) { } static void errorOrWarn(const Twine &S) { - if (Config->Force) + if (Config->ForceUnresolved) warn(S); else error(S); } -// Returns the name of the symbol in SC whose value is <= Addr that is closest -// to Addr. This is generally the name of the global variable or function whose -// definition contains Addr. -static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) { +// Returns the symbol in SC whose value is <= Addr that is closest to Addr. +// This is generally the global variable or function whose definition contains +// Addr. +static Symbol *getSymbol(SectionChunk *SC, uint32_t Addr) { DefinedRegular *Candidate = nullptr; for (Symbol *S : SC->File->getSymbols()) { @@ -81,14 +81,12 @@ static StringRef getSymbolName(SectionChunk *SC, uint32_t Addr) { Candidate = D; } - if (!Candidate) - return ""; - return Candidate->getName(); + return Candidate; } -static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { +std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { struct Location { - StringRef SymName; + Symbol *Sym; std::pair<StringRef, uint32_t> FileLine; }; std::vector<Location> Locations; @@ -102,14 +100,14 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { continue; std::pair<StringRef, uint32_t> FileLine = getFileLine(SC, R.VirtualAddress); - StringRef SymName = getSymbolName(SC, R.VirtualAddress); - if (!FileLine.first.empty() || !SymName.empty()) - Locations.push_back({SymName, FileLine}); + Symbol *Sym = getSymbol(SC, R.VirtualAddress); + if (!FileLine.first.empty() || Sym) + Locations.push_back({Sym, FileLine}); } } if (Locations.empty()) - return "\n>>> referenced by " + toString(File) + "\n"; + return "\n>>> referenced by " + toString(File); std::string Out; llvm::raw_string_ostream OS(Out); @@ -119,13 +117,87 @@ static std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex) { OS << Loc.FileLine.first << ":" << Loc.FileLine.second << "\n>>> "; OS << toString(File); - if (!Loc.SymName.empty()) - OS << ":(" << Loc.SymName << ')'; + if (Loc.Sym) + OS << ":(" << toString(*Loc.Sym) << ')'; } - OS << '\n'; return OS.str(); } +void SymbolTable::loadMinGWAutomaticImports() { + for (auto &I : SymMap) { + Symbol *Sym = I.second; + auto *Undef = dyn_cast<Undefined>(Sym); + if (!Undef) + continue; + if (!Sym->IsUsedInRegularObj) + continue; + + StringRef Name = Undef->getName(); + + if (Name.startswith("__imp_")) + continue; + // If we have an undefined symbol, but we have a Lazy representing a + // symbol we could load from file, make sure to load that. + Lazy *L = dyn_cast_or_null<Lazy>(find(("__imp_" + Name).str())); + if (!L || L->PendingArchiveLoad) + continue; + + log("Loading lazy " + L->getName() + " from " + L->File->getName() + + " for automatic import"); + L->PendingArchiveLoad = true; + L->File->addMember(&L->Sym); + } +} + +bool SymbolTable::handleMinGWAutomaticImport(Symbol *Sym, StringRef Name) { + if (Name.startswith("__imp_")) + return false; + Defined *Imp = dyn_cast_or_null<Defined>(find(("__imp_" + Name).str())); + if (!Imp) + return false; + + // Replace the reference directly to a variable with a reference + // to the import address table instead. This obviously isn't right, + // but we mark the symbol as IsRuntimePseudoReloc, and a later pass + // will add runtime pseudo relocations for every relocation against + // this Symbol. The runtime pseudo relocation framework expects the + // reference itself to point at the IAT entry. + size_t ImpSize = 0; + if (isa<DefinedImportData>(Imp)) { + log("Automatically importing " + Name + " from " + + cast<DefinedImportData>(Imp)->getDLLName()); + ImpSize = sizeof(DefinedImportData); + } else if (isa<DefinedRegular>(Imp)) { + log("Automatically importing " + Name + " from " + + toString(cast<DefinedRegular>(Imp)->File)); + ImpSize = sizeof(DefinedRegular); + } else { + warn("unable to automatically import " + Name + " from " + Imp->getName() + + " from " + toString(cast<DefinedRegular>(Imp)->File) + + "; unexpected symbol type"); + return false; + } + Sym->replaceKeepingName(Imp, ImpSize); + Sym->IsRuntimePseudoReloc = true; + + // There may exist symbols named .refptr.<name> which only consist + // of a single pointer to <name>. If it turns out <name> is + // automatically imported, we don't need to keep the .refptr.<name> + // pointer at all, but redirect all accesses to it to the IAT entry + // for __imp_<name> instead, and drop the whole .refptr.<name> chunk. + DefinedRegular *Refptr = + dyn_cast_or_null<DefinedRegular>(find((".refptr." + Name).str())); + if (Refptr && Refptr->getChunk()->getSize() == Config->Wordsize) { + SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Refptr->getChunk()); + if (SC && SC->Relocs.size() == 1 && *SC->symbols().begin() == Sym) { + log("Replacing .refptr." + Name + " with " + Imp->getName()); + Refptr->getChunk()->Live = false; + Refptr->replaceKeepingName(Imp, ImpSize); + } + } + return true; +} + void SymbolTable::reportRemainingUndefines() { SmallPtrSet<Symbol *, 8> Undefs; DenseMap<Symbol *, Symbol *> LocalImports; @@ -169,9 +241,17 @@ void SymbolTable::reportRemainingUndefines() { } } + // We don't want to report missing Microsoft precompiled headers symbols. + // A proper message will be emitted instead in PDBLinker::aquirePrecompObj + if (Name.contains("_PchSym_")) + continue; + + if (Config->MinGW && handleMinGWAutomaticImport(Sym, Name)) + continue; + // Remaining undefined symbols are not fatal if /force is specified. // They are replaced with dummy defined symbols. - if (Config->Force) + if (Config->ForceUnresolved) replaceSymbol<DefinedAbsolute>(Sym, Name, 0); Undefs.insert(Sym); } @@ -181,10 +261,10 @@ void SymbolTable::reportRemainingUndefines() { for (Symbol *B : Config->GCRoot) { if (Undefs.count(B)) - errorOrWarn("<root>: undefined symbol: " + B->getName()); + errorOrWarn("<root>: undefined symbol: " + toString(*B)); if (Config->WarnLocallyDefinedImported) if (Symbol *Imp = LocalImports.lookup(B)) - warn("<root>: locally defined symbol imported: " + Imp->getName() + + warn("<root>: locally defined symbol imported: " + toString(*Imp) + " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); } @@ -195,34 +275,41 @@ void SymbolTable::reportRemainingUndefines() { if (!Sym) continue; if (Undefs.count(Sym)) - errorOrWarn("undefined symbol: " + Sym->getName() + + errorOrWarn("undefined symbol: " + toString(*Sym) + getSymbolLocations(File, SymIndex)); if (Config->WarnLocallyDefinedImported) if (Symbol *Imp = LocalImports.lookup(Sym)) - warn(toString(File) + ": locally defined symbol imported: " + - Imp->getName() + " (defined in " + toString(Imp->getFile()) + - ") [LNK4217]"); + warn(toString(File) + + ": locally defined symbol imported: " + toString(*Imp) + + " (defined in " + toString(Imp->getFile()) + ") [LNK4217]"); } } } std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name) { + bool Inserted = false; Symbol *&Sym = SymMap[CachedHashStringRef(Name)]; - if (Sym) - return {Sym, false}; - Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); - Sym->IsUsedInRegularObj = false; - Sym->PendingArchiveLoad = false; - return {Sym, true}; + if (!Sym) { + Sym = reinterpret_cast<Symbol *>(make<SymbolUnion>()); + Sym->IsUsedInRegularObj = false; + Sym->PendingArchiveLoad = false; + Inserted = true; + } + return {Sym, Inserted}; +} + +std::pair<Symbol *, bool> SymbolTable::insert(StringRef Name, InputFile *File) { + std::pair<Symbol *, bool> Result = insert(Name); + if (!File || !isa<BitcodeFile>(File)) + Result.first->IsUsedInRegularObj = true; + return Result; } Symbol *SymbolTable::addUndefined(StringRef Name, InputFile *F, bool IsWeakAlias) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(Name); - if (!F || !isa<BitcodeFile>(F)) - S->IsUsedInRegularObj = true; + std::tie(S, WasInserted) = insert(Name, F); if (WasInserted || (isa<Lazy>(S) && IsWeakAlias)) { replaceSymbol<Undefined>(S, Name); return S; @@ -253,14 +340,20 @@ void SymbolTable::addLazy(ArchiveFile *F, const Archive::Symbol Sym) { } void SymbolTable::reportDuplicate(Symbol *Existing, InputFile *NewFile) { - error("duplicate symbol: " + toString(*Existing) + " in " + - toString(Existing->getFile()) + " and in " + toString(NewFile)); + std::string Msg = "duplicate symbol: " + toString(*Existing) + " in " + + toString(Existing->getFile()) + " and in " + + toString(NewFile); + + if (Config->ForceMultiple) + warn(Msg); + else + error(Msg); } Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); + std::tie(S, WasInserted) = insert(N, nullptr); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) replaceSymbol<DefinedAbsolute>(S, N, Sym); @@ -272,7 +365,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, COFFSymbolRef Sym) { Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); + std::tie(S, WasInserted) = insert(N, nullptr); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) replaceSymbol<DefinedAbsolute>(S, N, VA); @@ -284,7 +377,7 @@ Symbol *SymbolTable::addAbsolute(StringRef N, uint64_t VA) { Symbol *SymbolTable::addSynthetic(StringRef N, Chunk *C) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); + std::tie(S, WasInserted) = insert(N, nullptr); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) replaceSymbol<DefinedSynthetic>(S, N, C); @@ -298,9 +391,7 @@ Symbol *SymbolTable::addRegular(InputFile *F, StringRef N, SectionChunk *C) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); - if (!isa<BitcodeFile>(F)) - S->IsUsedInRegularObj = true; + std::tie(S, WasInserted) = insert(N, F); if (WasInserted || !isa<DefinedRegular>(S)) replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ false, /*IsExternal*/ true, Sym, C); @@ -314,9 +405,7 @@ SymbolTable::addComdat(InputFile *F, StringRef N, const coff_symbol_generic *Sym) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); - if (!isa<BitcodeFile>(F)) - S->IsUsedInRegularObj = true; + std::tie(S, WasInserted) = insert(N, F); if (WasInserted || !isa<DefinedRegular>(S)) { replaceSymbol<DefinedRegular>(S, F, N, /*IsCOMDAT*/ true, /*IsExternal*/ true, Sym, nullptr); @@ -331,9 +420,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, const coff_symbol_generic *Sym, CommonChunk *C) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); - if (!isa<BitcodeFile>(F)) - S->IsUsedInRegularObj = true; + std::tie(S, WasInserted) = insert(N, F); if (WasInserted || !isa<DefinedCOFF>(S)) replaceSymbol<DefinedCommon>(S, F, N, Size, Sym, C); else if (auto *DC = dyn_cast<DefinedCommon>(S)) @@ -345,7 +432,7 @@ Symbol *SymbolTable::addCommon(InputFile *F, StringRef N, uint64_t Size, Symbol *SymbolTable::addImportData(StringRef N, ImportFile *F) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(N); + std::tie(S, WasInserted) = insert(N, nullptr); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { replaceSymbol<DefinedImportData>(S, N, F); @@ -360,7 +447,7 @@ Symbol *SymbolTable::addImportThunk(StringRef Name, DefinedImportData *ID, uint16_t Machine) { Symbol *S; bool WasInserted; - std::tie(S, WasInserted) = insert(Name); + std::tie(S, WasInserted) = insert(Name, nullptr); S->IsUsedInRegularObj = true; if (WasInserted || isa<Undefined>(S) || isa<Lazy>(S)) { replaceSymbol<DefinedImportThunk>(S, Name, ID, Machine); diff --git a/COFF/SymbolTable.h b/COFF/SymbolTable.h index 30cb1a5410c3..00e55dbb7a02 100644 --- a/COFF/SymbolTable.h +++ b/COFF/SymbolTable.h @@ -54,6 +54,9 @@ public: // symbols. void reportRemainingUndefines(); + void loadMinGWAutomaticImports(); + bool handleMinGWAutomaticImport(Symbol *Sym, StringRef Name); + // Returns a list of chunks of selected symbols. std::vector<Chunk *> getChunks(); @@ -108,7 +111,10 @@ public: } private: + /// Inserts symbol if not already present. std::pair<Symbol *, bool> insert(StringRef Name); + /// Same as insert(Name), but also sets IsUsedInRegularObj. + std::pair<Symbol *, bool> insert(StringRef Name, InputFile *F); StringRef findByPrefix(StringRef Prefix); llvm::DenseMap<llvm::CachedHashStringRef, Symbol *> SymMap; @@ -117,6 +123,8 @@ private: extern SymbolTable *Symtab; +std::string getSymbolLocations(ObjFile *File, uint32_t SymIndex); + } // namespace coff } // namespace lld diff --git a/COFF/Symbols.cpp b/COFF/Symbols.cpp index 7c8b7d5e8fc5..ccaf86417f10 100644 --- a/COFF/Symbols.cpp +++ b/COFF/Symbols.cpp @@ -54,7 +54,7 @@ InputFile *Symbol::getFile() { bool Symbol::isLive() const { if (auto *R = dyn_cast<DefinedRegular>(this)) - return R->getChunk()->isLive(); + return R->getChunk()->Live; if (auto *Imp = dyn_cast<DefinedImportData>(this)) return Imp->File->Live; if (auto *Imp = dyn_cast<DefinedImportThunk>(this)) @@ -63,6 +63,13 @@ bool Symbol::isLive() const { return true; } +// MinGW specific. +void Symbol::replaceKeepingName(Symbol *Other, size_t Size) { + StringRef OrigName = Name; + memcpy(this, Other, Size); + Name = OrigName; +} + COFFSymbolRef DefinedCOFF::getCOFFSymbol() { size_t SymSize = cast<ObjFile>(File)->getCOFFObj()->getSymbolTableEntrySize(); if (SymSize == sizeof(coff_symbol16)) diff --git a/COFF/Symbols.h b/COFF/Symbols.h index 783965adbd9a..4a8693e22e3c 100644 --- a/COFF/Symbols.h +++ b/COFF/Symbols.h @@ -39,9 +39,9 @@ class Symbol { public: enum Kind { // The order of these is significant. We start with the regular defined - // symbols as those are the most prevelant and the zero tag is the cheapest + // symbols as those are the most prevalent and the zero tag is the cheapest // to set. Among the defined kinds, the lower the kind is preferred over - // the higher kind when testing wether one symbol should take precedence + // the higher kind when testing whether one symbol should take precedence // over another. DefinedRegularKind = 0, DefinedCommonKind, @@ -66,6 +66,8 @@ public: // Returns the symbol name. StringRef getName(); + void replaceKeepingName(Symbol *Other, size_t Size); + // Returns the file from which this symbol was created. InputFile *getFile(); @@ -78,7 +80,7 @@ protected: explicit Symbol(Kind K, StringRef N = "") : SymbolKind(K), IsExternal(true), IsCOMDAT(false), WrittenToSymtab(false), PendingArchiveLoad(false), IsGCRoot(false), - Name(N) {} + IsRuntimePseudoReloc(false), Name(N) {} const unsigned SymbolKind : 8; unsigned IsExternal : 1; @@ -102,6 +104,8 @@ public: /// True if we've already added this symbol to the list of GC roots. unsigned IsGCRoot : 1; + unsigned IsRuntimePseudoReloc : 1; + protected: StringRef Name; }; @@ -331,8 +335,8 @@ private: Chunk *Data; }; -// If you have a symbol "__imp_foo" in your object file, a symbol name -// "foo" becomes automatically available as a pointer to "__imp_foo". +// If you have a symbol "foo" in your object file, a symbol name +// "__imp_foo" becomes automatically available as a pointer to "foo". // This class is for such automatically-created symbols. // Yes, this is an odd feature. We didn't intend to implement that. // This is here just for compatibility with MSVC. diff --git a/COFF/Writer.cpp b/COFF/Writer.cpp index d17405ec26ab..258796ea6057 100644 --- a/COFF/Writer.cpp +++ b/COFF/Writer.cpp @@ -77,36 +77,38 @@ static const int SectorSize = 512; static const int DOSStubSize = sizeof(dos_header) + sizeof(DOSProgram); static_assert(DOSStubSize % 8 == 0, "DOSStub size must be multiple of 8"); -static const int NumberfOfDataDirectory = 16; +static const int NumberOfDataDirectory = 16; namespace { class DebugDirectoryChunk : public Chunk { public: - DebugDirectoryChunk(const std::vector<Chunk *> &R) : Records(R) {} + DebugDirectoryChunk(const std::vector<Chunk *> &R, bool WriteRepro) + : Records(R), WriteRepro(WriteRepro) {} size_t getSize() const override { - return Records.size() * sizeof(debug_directory); + return (Records.size() + int(WriteRepro)) * sizeof(debug_directory); } void writeTo(uint8_t *B) const override { auto *D = reinterpret_cast<debug_directory *>(B + OutputSectionOff); for (const Chunk *Record : Records) { - D->Characteristics = 0; - D->TimeDateStamp = 0; - D->MajorVersion = 0; - D->MinorVersion = 0; - D->Type = COFF::IMAGE_DEBUG_TYPE_CODEVIEW; - D->SizeOfData = Record->getSize(); - D->AddressOfRawData = Record->getRVA(); OutputSection *OS = Record->getOutputSection(); uint64_t Offs = OS->getFileOff() + (Record->getRVA() - OS->getRVA()); - D->PointerToRawData = Offs; - - TimeDateStamps.push_back(&D->TimeDateStamp); + fillEntry(D, COFF::IMAGE_DEBUG_TYPE_CODEVIEW, Record->getSize(), + Record->getRVA(), Offs); ++D; } + + if (WriteRepro) { + // FIXME: The COFF spec allows either a 0-sized entry to just say + // "the timestamp field is really a hash", or a 4-byte size field + // followed by that many bytes containing a longer hash (with the + // lowest 4 bytes usually being the timestamp in little-endian order). + // Consider storing the full 8 bytes computed by xxHash64 here. + fillEntry(D, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0); + } } void setTimeDateStamp(uint32_t TimeDateStamp) { @@ -115,8 +117,23 @@ public: } private: + void fillEntry(debug_directory *D, COFF::DebugType DebugType, size_t Size, + uint64_t RVA, uint64_t Offs) const { + D->Characteristics = 0; + D->TimeDateStamp = 0; + D->MajorVersion = 0; + D->MinorVersion = 0; + D->Type = DebugType; + D->SizeOfData = Size; + D->AddressOfRawData = RVA; + D->PointerToRawData = Offs; + + TimeDateStamps.push_back(&D->TimeDateStamp); + } + mutable std::vector<support::ulittle32_t *> TimeDateStamps; const std::vector<Chunk *> &Records; + bool WriteRepro; }; class CVDebugRecordChunk : public Chunk { @@ -150,14 +167,22 @@ private: void createSections(); void createMiscChunks(); void createImportTables(); + void appendImportThunks(); + void locateImportTables( + std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map); void createExportTable(); void mergeSections(); + void readRelocTargets(); + void removeUnusedSections(); void assignAddresses(); + void finalizeAddresses(); void removeEmptySections(); void createSymbolAndStringTable(); void openFile(StringRef OutputPath); template <typename PEHeaderTy> void writeHeader(); void createSEHTable(); + void createRuntimePseudoRelocs(); + void insertCtorDtorSymbols(); void createGuardCFTables(); void markSymbolsForRVATable(ObjFile *File, ArrayRef<SectionChunk *> SymIdxChunks, @@ -168,6 +193,7 @@ private: void writeSections(); void writeBuildId(); void sortExceptionTable(); + void sortCRTSectionChunks(std::vector<Chunk *> &Chunks); llvm::Optional<coff_symbol16> createSymbol(Defined *D); size_t addEntryToStringTable(StringRef Str); @@ -184,6 +210,10 @@ private: std::vector<char> Strtab; std::vector<llvm::object::coff_symbol16> OutputSymtab; IdataContents Idata; + Chunk *ImportTableStart = nullptr; + uint64_t ImportTableSize = 0; + Chunk *IATStart = nullptr; + uint64_t IATSize = 0; DelayLoadContents DelayIdata; EdataContents Edata; bool SetNoSEHCharacteristic = false; @@ -191,7 +221,6 @@ private: DebugDirectoryChunk *DebugDirectory = nullptr; std::vector<Chunk *> DebugRecords; CVDebugRecordChunk *BuildId = nullptr; - Optional<codeview::DebugInfo> PreviousBuildId; ArrayRef<uint8_t> SectionTable; uint64_t FileSize; @@ -209,6 +238,8 @@ private: OutputSection *DidatSec; OutputSection *RsrcSec; OutputSection *RelocSec; + OutputSection *CtorsSec; + OutputSection *DtorsSec; // The first and last .pdata sections in the output file. // @@ -237,6 +268,11 @@ void OutputSection::addChunk(Chunk *C) { C->setOutputSection(this); } +void OutputSection::insertChunkAtStart(Chunk *C) { + Chunks.insert(Chunks.begin(), C); + C->setOutputSection(this); +} + void OutputSection::setPermissions(uint32_t C) { Header.Characteristics &= ~PermMask; Header.Characteristics |= C; @@ -267,77 +303,206 @@ void OutputSection::writeHeaderTo(uint8_t *Buf) { } // namespace coff } // namespace lld -// PDBs are matched against executables using a build id which consists of three -// components: -// 1. A 16-bit GUID -// 2. An age -// 3. A time stamp. +// Check whether the target address S is in range from a relocation +// of type RelType at address P. +static bool isInRange(uint16_t RelType, uint64_t S, uint64_t P, int Margin) { + assert(Config->Machine == ARMNT); + int64_t Diff = AbsoluteDifference(S, P + 4) + Margin; + switch (RelType) { + case IMAGE_REL_ARM_BRANCH20T: + return isInt<21>(Diff); + case IMAGE_REL_ARM_BRANCH24T: + case IMAGE_REL_ARM_BLX23T: + return isInt<25>(Diff); + default: + return true; + } +} + +// Return the last thunk for the given target if it is in range, +// or create a new one. +static std::pair<Defined *, bool> +getThunk(DenseMap<uint64_t, Defined *> &LastThunks, Defined *Target, uint64_t P, + uint16_t Type, int Margin) { + Defined *&LastThunk = LastThunks[Target->getRVA()]; + if (LastThunk && isInRange(Type, LastThunk->getRVA(), P, Margin)) + return {LastThunk, false}; + RangeExtensionThunk *C = make<RangeExtensionThunk>(Target); + Defined *D = make<DefinedSynthetic>("", C); + LastThunk = D; + return {D, true}; +} + +// This checks all relocations, and for any relocation which isn't in range +// it adds a thunk after the section chunk that contains the relocation. +// If the latest thunk for the specific target is in range, that is used +// instead of creating a new thunk. All range checks are done with the +// specified margin, to make sure that relocations that originally are in +// range, but only barely, also get thunks - in case other added thunks makes +// the target go out of range. // -// Debuggers and symbol servers match executables against debug info by checking -// each of these components of the EXE/DLL against the corresponding value in -// the PDB and failing a match if any of the components differ. In the case of -// symbol servers, symbols are cached in a folder that is a function of the -// GUID. As a result, in order to avoid symbol cache pollution where every -// incremental build copies a new PDB to the symbol cache, we must try to re-use -// the existing GUID if one exists, but bump the age. This way the match will -// fail, so the symbol cache knows to use the new PDB, but the GUID matches, so -// it overwrites the existing item in the symbol cache rather than making a new -// one. -static Optional<codeview::DebugInfo> loadExistingBuildId(StringRef Path) { - // We don't need to incrementally update a previous build id if we're not - // writing codeview debug info. - if (!Config->Debug) - return None; +// After adding thunks, we verify that all relocations are in range (with +// no extra margin requirements). If this failed, we restart (throwing away +// the previously created thunks) and retry with a wider margin. +static bool createThunks(std::vector<Chunk *> &Chunks, int Margin) { + bool AddressesChanged = false; + DenseMap<uint64_t, Defined *> LastThunks; + size_t ThunksSize = 0; + // Recheck Chunks.size() each iteration, since we can insert more + // elements into it. + for (size_t I = 0; I != Chunks.size(); ++I) { + SectionChunk *SC = dyn_cast_or_null<SectionChunk>(Chunks[I]); + if (!SC) + continue; + size_t ThunkInsertionSpot = I + 1; + + // Try to get a good enough estimate of where new thunks will be placed. + // Offset this by the size of the new thunks added so far, to make the + // estimate slightly better. + size_t ThunkInsertionRVA = SC->getRVA() + SC->getSize() + ThunksSize; + for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) { + const coff_relocation &Rel = SC->Relocs[J]; + Symbol *&RelocTarget = SC->RelocTargets[J]; + + // The estimate of the source address P should be pretty accurate, + // but we don't know whether the target Symbol address should be + // offset by ThunkSize or not (or by some of ThunksSize but not all of + // it), giving us some uncertainty once we have added one thunk. + uint64_t P = SC->getRVA() + Rel.VirtualAddress + ThunksSize; + + Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget); + if (!Sym) + continue; - auto ExpectedBinary = llvm::object::createBinary(Path); - if (!ExpectedBinary) { - consumeError(ExpectedBinary.takeError()); - return None; + uint64_t S = Sym->getRVA(); + + if (isInRange(Rel.Type, S, P, Margin)) + continue; + + // If the target isn't in range, hook it up to an existing or new + // thunk. + Defined *Thunk; + bool WasNew; + std::tie(Thunk, WasNew) = getThunk(LastThunks, Sym, P, Rel.Type, Margin); + if (WasNew) { + Chunk *ThunkChunk = Thunk->getChunk(); + ThunkChunk->setRVA( + ThunkInsertionRVA); // Estimate of where it will be located. + Chunks.insert(Chunks.begin() + ThunkInsertionSpot, ThunkChunk); + ThunkInsertionSpot++; + ThunksSize += ThunkChunk->getSize(); + ThunkInsertionRVA += ThunkChunk->getSize(); + AddressesChanged = true; + } + RelocTarget = Thunk; + } } + return AddressesChanged; +} - auto Binary = std::move(*ExpectedBinary); - if (!Binary.getBinary()->isCOFF()) - return None; +// Verify that all relocations are in range, with no extra margin requirements. +static bool verifyRanges(const std::vector<Chunk *> Chunks) { + for (Chunk *C : Chunks) { + SectionChunk *SC = dyn_cast_or_null<SectionChunk>(C); + if (!SC) + continue; - std::error_code EC; - COFFObjectFile File(Binary.getBinary()->getMemoryBufferRef(), EC); - if (EC) - return None; + for (size_t J = 0, E = SC->Relocs.size(); J < E; ++J) { + const coff_relocation &Rel = SC->Relocs[J]; + Symbol *RelocTarget = SC->RelocTargets[J]; - // If the machine of the binary we're outputting doesn't match the machine - // of the existing binary, don't try to re-use the build id. - if (File.is64() != Config->is64() || File.getMachine() != Config->Machine) - return None; + Defined *Sym = dyn_cast_or_null<Defined>(RelocTarget); + if (!Sym) + continue; - for (const auto &DebugDir : File.debug_directories()) { - if (DebugDir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) - continue; + uint64_t P = SC->getRVA() + Rel.VirtualAddress; + uint64_t S = Sym->getRVA(); - const codeview::DebugInfo *ExistingDI = nullptr; - StringRef PDBFileName; - if (auto EC = File.getDebugPDBInfo(ExistingDI, PDBFileName)) { - (void)EC; - return None; + if (!isInRange(Rel.Type, S, P, 0)) + return false; } - // We only support writing PDBs in v70 format. So if this is not a build - // id that we recognize / support, ignore it. - if (ExistingDI->Signature.CVSignature != OMF::Signature::PDB70) - return None; - return *ExistingDI; } - return None; + return true; +} + +// Assign addresses and add thunks if necessary. +void Writer::finalizeAddresses() { + assignAddresses(); + if (Config->Machine != ARMNT) + return; + + size_t OrigNumChunks = 0; + for (OutputSection *Sec : OutputSections) { + Sec->OrigChunks = Sec->Chunks; + OrigNumChunks += Sec->Chunks.size(); + } + + int Pass = 0; + int Margin = 1024 * 100; + while (true) { + // First check whether we need thunks at all, or if the previous pass of + // adding them turned out ok. + bool RangesOk = true; + size_t NumChunks = 0; + for (OutputSection *Sec : OutputSections) { + if (!verifyRanges(Sec->Chunks)) { + RangesOk = false; + break; + } + NumChunks += Sec->Chunks.size(); + } + if (RangesOk) { + if (Pass > 0) + log("Added " + Twine(NumChunks - OrigNumChunks) + " thunks with " + + "margin " + Twine(Margin) + " in " + Twine(Pass) + " passes"); + return; + } + + if (Pass >= 10) + fatal("adding thunks hasn't converged after " + Twine(Pass) + " passes"); + + if (Pass > 0) { + // If the previous pass didn't work out, reset everything back to the + // original conditions before retrying with a wider margin. This should + // ideally never happen under real circumstances. + for (OutputSection *Sec : OutputSections) { + Sec->Chunks = Sec->OrigChunks; + for (Chunk *C : Sec->Chunks) + C->resetRelocTargets(); + } + Margin *= 2; + } + + // Try adding thunks everywhere where it is needed, with a margin + // to avoid things going out of range due to the added thunks. + bool AddressesChanged = false; + for (OutputSection *Sec : OutputSections) + AddressesChanged |= createThunks(Sec->Chunks, Margin); + // If the verification above thought we needed thunks, we should have + // added some. + assert(AddressesChanged); + + // Recalculate the layout for the whole image (and verify the ranges at + // the start of the next round). + assignAddresses(); + + Pass++; + } } // The main function of the writer. void Writer::run() { ScopedTimer T1(CodeLayoutTimer); + createImportTables(); createSections(); createMiscChunks(); - createImportTables(); + appendImportThunks(); createExportTable(); mergeSections(); - assignAddresses(); + readRelocTargets(); + removeUnusedSections(); + finalizeAddresses(); removeEmptySections(); setSectionPermissions(); createSymbolAndStringTable(); @@ -346,9 +511,6 @@ void Writer::run() { fatal("image size (" + Twine(FileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); - // We must do this before opening the output file, as it depends on being able - // to read the contents of the existing output file. - PreviousBuildId = loadExistingBuildId(Config->OutputFile); openFile(Config->OutputFile); if (Config->is64()) { writeHeader<pe32plus_header>(); @@ -357,14 +519,14 @@ void Writer::run() { } writeSections(); sortExceptionTable(); - writeBuildId(); T1.stop(); if (!Config->PDBPath.empty() && Config->Debug) { assert(BuildId); - createPDB(Symtab, OutputSections, SectionTable, *BuildId->BuildId); + createPDB(Symtab, OutputSections, SectionTable, BuildId->BuildId); } + writeBuildId(); writeMapFile(OutputSections); @@ -396,6 +558,110 @@ static void sortBySectionOrder(std::vector<Chunk *> &Chunks) { }); } +// Sort concrete section chunks from GNU import libraries. +// +// GNU binutils doesn't use short import files, but instead produces import +// libraries that consist of object files, with section chunks for the .idata$* +// sections. These are linked just as regular static libraries. Each import +// library consists of one header object, one object file for every imported +// symbol, and one trailer object. In order for the .idata tables/lists to +// be formed correctly, the section chunks within each .idata$* section need +// to be grouped by library, and sorted alphabetically within each library +// (which makes sure the header comes first and the trailer last). +static bool fixGnuImportChunks( + std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) { + uint32_t RDATA = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + + // Make sure all .idata$* section chunks are mapped as RDATA in order to + // be sorted into the same sections as our own synthesized .idata chunks. + for (auto &Pair : Map) { + StringRef SectionName = Pair.first.first; + uint32_t OutChars = Pair.first.second; + if (!SectionName.startswith(".idata")) + continue; + if (OutChars == RDATA) + continue; + std::vector<Chunk *> &SrcVect = Pair.second; + std::vector<Chunk *> &DestVect = Map[{SectionName, RDATA}]; + DestVect.insert(DestVect.end(), SrcVect.begin(), SrcVect.end()); + SrcVect.clear(); + } + + bool HasIdata = false; + // Sort all .idata$* chunks, grouping chunks from the same library, + // with alphabetical ordering of the object fils within a library. + for (auto &Pair : Map) { + StringRef SectionName = Pair.first.first; + if (!SectionName.startswith(".idata")) + continue; + + std::vector<Chunk *> &Chunks = Pair.second; + if (!Chunks.empty()) + HasIdata = true; + std::stable_sort(Chunks.begin(), Chunks.end(), [&](Chunk *S, Chunk *T) { + SectionChunk *SC1 = dyn_cast_or_null<SectionChunk>(S); + SectionChunk *SC2 = dyn_cast_or_null<SectionChunk>(T); + if (!SC1 || !SC2) { + // if SC1, order them ascending. If SC2 or both null, + // S is not less than T. + return SC1 != nullptr; + } + // Make a string with "libraryname/objectfile" for sorting, achieving + // both grouping by library and sorting of objects within a library, + // at once. + std::string Key1 = + (SC1->File->ParentName + "/" + SC1->File->getName()).str(); + std::string Key2 = + (SC2->File->ParentName + "/" + SC2->File->getName()).str(); + return Key1 < Key2; + }); + } + return HasIdata; +} + +// Add generated idata chunks, for imported symbols and DLLs, and a +// terminator in .idata$2. +static void addSyntheticIdata( + IdataContents &Idata, + std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) { + uint32_t RDATA = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + Idata.create(); + + // Add the .idata content in the right section groups, to allow + // chunks from other linked in object files to be grouped together. + // See Microsoft PE/COFF spec 5.4 for details. + auto Add = [&](StringRef N, std::vector<Chunk *> &V) { + std::vector<Chunk *> &DestVect = Map[{N, RDATA}]; + DestVect.insert(DestVect.end(), V.begin(), V.end()); + }; + + // The loader assumes a specific order of data. + // Add each type in the correct order. + Add(".idata$2", Idata.Dirs); + Add(".idata$4", Idata.Lookups); + Add(".idata$5", Idata.Addresses); + Add(".idata$6", Idata.Hints); + Add(".idata$7", Idata.DLLNames); +} + +// Locate the first Chunk and size of the import directory list and the +// IAT. +void Writer::locateImportTables( + std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> &Map) { + uint32_t RDATA = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; + std::vector<Chunk *> &ImportTables = Map[{".idata$2", RDATA}]; + if (!ImportTables.empty()) + ImportTableStart = ImportTables.front(); + for (Chunk *C : ImportTables) + ImportTableSize += C->getSize(); + + std::vector<Chunk *> &IAT = Map[{".idata$5", RDATA}]; + if (!IAT.empty()) + IATStart = IAT.front(); + for (Chunk *C : IAT) + IATSize += C->getSize(); +} + // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. @@ -429,12 +695,14 @@ void Writer::createSections() { DidatSec = CreateSection(".didat", DATA | R); RsrcSec = CreateSection(".rsrc", DATA | R); RelocSec = CreateSection(".reloc", DATA | DISCARDABLE | R); + CtorsSec = CreateSection(".ctors", DATA | R | W); + DtorsSec = CreateSection(".dtors", DATA | R | W); // Then bin chunks by name and output characteristics. std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> Map; for (Chunk *C : Symtab->getChunks()) { auto *SC = dyn_cast<SectionChunk>(C); - if (SC && !SC->isLive()) { + if (SC && !SC->Live) { if (Config->Verbose) SC->printDiscardedMessage(); continue; @@ -442,26 +710,43 @@ void Writer::createSections() { Map[{C->getSectionName(), C->getOutputCharacteristics()}].push_back(C); } + // Even in non MinGW cases, we might need to link against GNU import + // libraries. + bool HasIdata = fixGnuImportChunks(Map); + if (!Idata.empty()) + HasIdata = true; + + if (HasIdata) + addSyntheticIdata(Idata, Map); + // Process an /order option. if (!Config->Order.empty()) for (auto &Pair : Map) sortBySectionOrder(Pair.second); + if (HasIdata) + locateImportTables(Map); + // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. - for (auto Pair : Map) { + for (auto &Pair : Map) { StringRef Name = getOutputSectionName(Pair.first.first); uint32_t OutChars = Pair.first.second; - // In link.exe, there is a special case for the I386 target where .CRT - // sections are treated as if they have output characteristics DATA | R if - // their characteristics are DATA | R | W. This implements the same special - // case for all architectures. - if (Name == ".CRT") + if (Name == ".CRT") { + // In link.exe, there is a special case for the I386 target where .CRT + // sections are treated as if they have output characteristics DATA | R if + // their characteristics are DATA | R | W. This implements the same + // special case for all architectures. OutChars = DATA | R; + log("Processing section " + Pair.first.first + " -> " + Name); + + sortCRTSectionChunks(Pair.second); + } + OutputSection *Sec = CreateSection(Name, OutChars); std::vector<Chunk *> &Chunks = Pair.second; for (Chunk *C : Chunks) @@ -499,20 +784,20 @@ void Writer::createMiscChunks() { } // Create Debug Information Chunks - if (Config->Debug) { - DebugDirectory = make<DebugDirectoryChunk>(DebugRecords); - - OutputSection *DebugInfoSec = Config->MinGW ? BuildidSec : RdataSec; + OutputSection *DebugInfoSec = Config->MinGW ? BuildidSec : RdataSec; + if (Config->Debug || Config->Repro) { + DebugDirectory = make<DebugDirectoryChunk>(DebugRecords, Config->Repro); + DebugInfoSec->addChunk(DebugDirectory); + } + if (Config->Debug) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even // if we're ultimately not going to write CodeView data to the PDB. - auto *CVChunk = make<CVDebugRecordChunk>(); - BuildId = CVChunk; - DebugRecords.push_back(CVChunk); + BuildId = make<CVDebugRecordChunk>(); + DebugRecords.push_back(BuildId); - DebugInfoSec->addChunk(DebugDirectory); for (Chunk *C : DebugRecords) DebugInfoSec->addChunk(C); } @@ -524,6 +809,12 @@ void Writer::createMiscChunks() { // Create /guard:cf tables if requested. if (Config->GuardCF != GuardCFLevel::Off) createGuardCFTables(); + + if (Config->MinGW) { + createRuntimePseudoRelocs(); + + insertCtorDtorSymbols(); + } } // Create .idata section for the DLL-imported symbol table. @@ -531,9 +822,6 @@ void Writer::createMiscChunks() { // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { - if (ImportFile::Instances.empty()) - return; - // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) @@ -545,14 +833,6 @@ void Writer::createImportTables() { if (Config->DLLOrder.count(DLL) == 0) Config->DLLOrder[DLL] = Config->DLLOrder.size(); - if (File->ThunkSym) { - if (!isa<DefinedImportThunk>(File->ThunkSym)) - fatal(toString(*File->ThunkSym) + " was replaced"); - DefinedImportThunk *Thunk = cast<DefinedImportThunk>(File->ThunkSym); - if (File->ThunkLive) - TextSec->addChunk(Thunk->getChunk()); - } - if (File->ImpSym && !isa<DefinedImportData>(File->ImpSym)) fatal(toString(*File->ImpSym) + " was replaced"); DefinedImportData *ImpSym = cast_or_null<DefinedImportData>(File->ImpSym); @@ -565,10 +845,25 @@ void Writer::createImportTables() { Idata.add(ImpSym); } } +} - if (!Idata.empty()) - for (Chunk *C : Idata.getChunks()) - IdataSec->addChunk(C); +void Writer::appendImportThunks() { + if (ImportFile::Instances.empty()) + return; + + for (ImportFile *File : ImportFile::Instances) { + if (!File->Live) + continue; + + if (!File->ThunkSym) + continue; + + if (!isa<DefinedImportThunk>(File->ThunkSym)) + fatal(toString(*File->ThunkSym) + " was replaced"); + DefinedImportThunk *Thunk = cast<DefinedImportThunk>(File->ThunkSym); + if (File->ThunkLive) + TextSec->addChunk(Thunk->getChunk()); + } if (!DelayIdata.empty()) { Defined *Helper = cast<Defined>(Config->DelayLoadHelper); @@ -589,6 +884,21 @@ void Writer::createExportTable() { EdataSec->addChunk(C); } +void Writer::removeUnusedSections() { + // Remove sections that we can be sure won't get content, to avoid + // allocating space for their section headers. + auto IsUnused = [this](OutputSection *S) { + if (S == RelocSec) + return false; // This section is populated later. + // MergeChunks have zero size at this point, as their size is finalized + // later. Only remove sections that have no Chunks at all. + return S->Chunks.empty(); + }; + OutputSections.erase( + std::remove_if(OutputSections.begin(), OutputSections.end(), IsUnused), + OutputSections.end()); +} + // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { @@ -699,9 +1009,9 @@ void Writer::createSymbolAndStringTable() { } void Writer::mergeSections() { - if (!PdataSec->getChunks().empty()) { - FirstPdata = PdataSec->getChunks().front(); - LastPdata = PdataSec->getChunks().back(); + if (!PdataSec->Chunks.empty()) { + FirstPdata = PdataSec->Chunks.front(); + LastPdata = PdataSec->Chunks.back(); } for (auto &P : Config->Merge) { @@ -729,11 +1039,18 @@ void Writer::mergeSections() { } } +// Visits all sections to initialize their relocation targets. +void Writer::readRelocTargets() { + for (OutputSection *Sec : OutputSections) + for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(), + [&](Chunk *C) { C->readRelocTargets(); }); +} + // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { SizeOfHeaders = DOSStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + - sizeof(data_directory) * NumberfOfDataDirectory + + sizeof(data_directory) * NumberOfDataDirectory + sizeof(coff_section) * OutputSections.size(); SizeOfHeaders += Config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); @@ -746,7 +1063,7 @@ void Writer::assignAddresses() { addBaserels(); uint64_t RawSize = 0, VirtualSize = 0; Sec->Header.VirtualAddress = RVA; - for (Chunk *C : Sec->getChunks()) { + for (Chunk *C : Sec->Chunks) { VirtualSize = alignTo(VirtualSize, C->Alignment); C->setRVA(RVA + VirtualSize); C->OutputSectionOff = VirtualSize; @@ -808,7 +1125,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() { if (!Config->Relocatable) COFF->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; COFF->SizeOfOptionalHeader = - sizeof(PEHeaderTy) + sizeof(data_directory) * NumberfOfDataDirectory; + sizeof(PEHeaderTy) + sizeof(data_directory) * NumberOfDataDirectory; // Write PE header auto *PE = reinterpret_cast<PEHeaderTy *>(Buf); @@ -866,7 +1183,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() { PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (Config->TerminalServerAware) PE->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; - PE->NumberOfRvaAndSize = NumberfOfDataDirectory; + PE->NumberOfRvaAndSize = NumberOfDataDirectory; if (TextSec->getVirtualSize()) { PE->BaseOfCode = TextSec->getRVA(); PE->SizeOfCode = TextSec->getRawSize(); @@ -875,16 +1192,18 @@ template <typename PEHeaderTy> void Writer::writeHeader() { // Write data directory auto *Dir = reinterpret_cast<data_directory *>(Buf); - Buf += sizeof(*Dir) * NumberfOfDataDirectory; + Buf += sizeof(*Dir) * NumberOfDataDirectory; if (!Config->Exports.empty()) { Dir[EXPORT_TABLE].RelativeVirtualAddress = Edata.getRVA(); Dir[EXPORT_TABLE].Size = Edata.getSize(); } - if (!Idata.empty()) { - Dir[IMPORT_TABLE].RelativeVirtualAddress = Idata.getDirRVA(); - Dir[IMPORT_TABLE].Size = Idata.getDirSize(); - Dir[IAT].RelativeVirtualAddress = Idata.getIATRVA(); - Dir[IAT].Size = Idata.getIATSize(); + if (ImportTableStart) { + Dir[IMPORT_TABLE].RelativeVirtualAddress = ImportTableStart->getRVA(); + Dir[IMPORT_TABLE].Size = ImportTableSize; + } + if (IATStart) { + Dir[IAT].RelativeVirtualAddress = IATStart->getRVA(); + Dir[IAT].Size = IATSize; } if (RsrcSec->getVirtualSize()) { Dir[RESOURCE_TABLE].RelativeVirtualAddress = RsrcSec->getRVA(); @@ -907,7 +1226,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() { : sizeof(object::coff_tls_directory32); } } - if (Config->Debug) { + if (DebugDirectory) { Dir[DEBUG_DIRECTORY].RelativeVirtualAddress = DebugDirectory->getRVA(); Dir[DEBUG_DIRECTORY].Size = DebugDirectory->getSize(); } @@ -1002,6 +1321,25 @@ static void addSymbolToRVASet(SymbolRVASet &RVASet, Defined *S) { RVASet.insert({C, Off}); } +// Given a symbol, add it to the GFIDs table if it is a live, defined, function +// symbol in an executable section. +static void maybeAddAddressTakenFunction(SymbolRVASet &AddressTakenSyms, + Symbol *S) { + auto *D = dyn_cast_or_null<DefinedCOFF>(S); + + // Ignore undefined symbols and references to non-functions (e.g. globals and + // labels). + if (!D || + D->getCOFFSymbol().getComplexType() != COFF::IMAGE_SYM_DTYPE_FUNCTION) + return; + + // Mark the symbol as address taken if it's in an executable section. + Chunk *RefChunk = D->getChunk(); + OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr; + if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE) + addSymbolToRVASet(AddressTakenSyms, D); +} + // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. static void markSymbolsWithRelocations(ObjFile *File, @@ -1010,21 +1348,17 @@ static void markSymbolsWithRelocations(ObjFile *File, // We only care about live section chunks. Common chunks and other chunks // don't generally contain relocations. SectionChunk *SC = dyn_cast<SectionChunk>(C); - if (!SC || !SC->isLive()) + if (!SC || !SC->Live) continue; - // Look for relocations in this section against symbols in executable output - // sections. - for (Symbol *Ref : SC->symbols()) { - // FIXME: Do further testing to see if the relocation type matters, - // especially for 32-bit where taking the address of something usually - // uses an absolute relocation instead of a relative one. - if (auto *D = dyn_cast_or_null<Defined>(Ref)) { - Chunk *RefChunk = D->getChunk(); - OutputSection *OS = RefChunk ? RefChunk->getOutputSection() : nullptr; - if (OS && OS->Header.Characteristics & IMAGE_SCN_MEM_EXECUTE) - addSymbolToRVASet(UsedSymbols, D); - } + for (const coff_relocation &Reloc : SC->Relocs) { + if (Config->Machine == I386 && Reloc.Type == COFF::IMAGE_REL_I386_REL32) + // Ignore relative relocations on x86. On x86_64 they can't be ignored + // since they're also used to compute absolute addresses. + continue; + + Symbol *Ref = SC->File->getSymbol(Reloc.SymbolTableIndex); + maybeAddAddressTakenFunction(UsedSymbols, Ref); } } } @@ -1051,7 +1385,11 @@ void Writer::createGuardCFTables() { // Mark the image entry as address-taken. if (Config->Entry) - addSymbolToRVASet(AddressTakenSyms, cast<Defined>(Config->Entry)); + maybeAddAddressTakenFunction(AddressTakenSyms, Config->Entry); + + // Mark exported symbols in executable sections as address-taken. + for (Export &E : Config->Exports) + maybeAddAddressTakenFunction(AddressTakenSyms, E.Sym); // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &C : AddressTakenSyms) @@ -1087,7 +1425,7 @@ void Writer::markSymbolsForRVATable(ObjFile *File, // is associated with something like a vtable and the vtable is discarded. // In this case, the associated gfids section is discarded, and we don't // mark the virtual member functions as address-taken by the vtable. - if (!C->isLive()) + if (!C->Live) continue; // Validate that the contents look like symbol table indices. @@ -1134,6 +1472,56 @@ void Writer::maybeAddRVATable(SymbolRVASet TableSymbols, StringRef TableSym, cast<DefinedAbsolute>(C)->setVA(TableChunk->getSize() / 4); } +// MinGW specific. Gather all relocations that are imported from a DLL even +// though the code didn't expect it to, produce the table that the runtime +// uses for fixing them up, and provide the synthetic symbols that the +// runtime uses for finding the table. +void Writer::createRuntimePseudoRelocs() { + std::vector<RuntimePseudoReloc> Rels; + + for (Chunk *C : Symtab->getChunks()) { + auto *SC = dyn_cast<SectionChunk>(C); + if (!SC || !SC->Live) + continue; + SC->getRuntimePseudoRelocs(Rels); + } + + if (!Rels.empty()) + log("Writing " + Twine(Rels.size()) + " runtime pseudo relocations"); + PseudoRelocTableChunk *Table = make<PseudoRelocTableChunk>(Rels); + RdataSec->addChunk(Table); + EmptyChunk *EndOfList = make<EmptyChunk>(); + RdataSec->addChunk(EndOfList); + + Symbol *HeadSym = Symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__"); + Symbol *EndSym = Symtab->findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__"); + replaceSymbol<DefinedSynthetic>(HeadSym, HeadSym->getName(), Table); + replaceSymbol<DefinedSynthetic>(EndSym, EndSym->getName(), EndOfList); +} + +// MinGW specific. +// The MinGW .ctors and .dtors lists have sentinels at each end; +// a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. +// There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ +// and __DTOR_LIST__ respectively. +void Writer::insertCtorDtorSymbols() { + AbsolutePointerChunk *CtorListHead = make<AbsolutePointerChunk>(-1); + AbsolutePointerChunk *CtorListEnd = make<AbsolutePointerChunk>(0); + AbsolutePointerChunk *DtorListHead = make<AbsolutePointerChunk>(-1); + AbsolutePointerChunk *DtorListEnd = make<AbsolutePointerChunk>(0); + CtorsSec->insertChunkAtStart(CtorListHead); + CtorsSec->addChunk(CtorListEnd); + DtorsSec->insertChunkAtStart(DtorListHead); + DtorsSec->addChunk(DtorListEnd); + + Symbol *CtorListSym = Symtab->findUnderscore("__CTOR_LIST__"); + Symbol *DtorListSym = Symtab->findUnderscore("__DTOR_LIST__"); + replaceSymbol<DefinedSynthetic>(CtorListSym, CtorListSym->getName(), + CtorListHead); + replaceSymbol<DefinedSynthetic>(DtorListSym, DtorListSym->getName(), + DtorListHead); +} + // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { @@ -1160,7 +1548,7 @@ void Writer::writeSections() { // ADD instructions). if (Sec->Header.Characteristics & IMAGE_SCN_CNT_CODE) memset(SecBuf, 0xCC, Sec->getRawSize()); - for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(), + for_each(parallel::par, Sec->Chunks.begin(), Sec->Chunks.end(), [&](Chunk *C) { C->writeTo(SecBuf); }); } } @@ -1171,25 +1559,10 @@ void Writer::writeBuildId() { // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the - // binary, however when building with debug info the hash needs to take into - // account the debug info, since it's possible to add blank lines to a file - // which causes the debug info to change but not the generated code. - // - // To handle this, we first set the Guid and Age in the debug directory (but - // only if we're doing a debug build). Then, we hash the binary (thus causing - // the hash to change if only the debug info changes, since the Age will be - // different). Finally, we write that hash into the debug directory (if - // present) as well as the COFF file header (always). + // PE contents. if (Config->Debug) { assert(BuildId && "BuildId is not set!"); - if (PreviousBuildId.hasValue()) { - *BuildId->BuildId = *PreviousBuildId; - BuildId->BuildId->PDB70.Age = BuildId->BuildId->PDB70.Age + 1; - } else { - BuildId->BuildId->Signature.CVSignature = OMF::Signature::PDB70; - BuildId->BuildId->PDB70.Age = 1; - llvm::getRandomBytes(BuildId->BuildId->PDB70.Signature, 16); - } + // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the @@ -1201,8 +1574,25 @@ void Writer::writeBuildId() { Buffer->getBufferSize()); uint32_t Timestamp = Config->Timestamp; + uint64_t Hash = 0; + bool GenerateSyntheticBuildId = + Config->MinGW && Config->Debug && Config->PDBPath.empty(); + + if (Config->Repro || GenerateSyntheticBuildId) + Hash = xxHash64(OutputFileData); + if (Config->Repro) - Timestamp = static_cast<uint32_t>(xxHash64(OutputFileData)); + Timestamp = static_cast<uint32_t>(Hash); + + if (GenerateSyntheticBuildId) { + // For MinGW builds without a PDB file, we still generate a build id + // to allow associating a crash dump to the executable. + BuildId->BuildId->PDB70.CVSignature = OMF::Signature::PDB70; + BuildId->BuildId->PDB70.Age = 1; + memcpy(BuildId->BuildId->PDB70.Signature, &Hash, 8); + // xxhash only gives us 8 bytes, so put some fixed data in the other half. + memcpy(&BuildId->BuildId->PDB70.Signature[8], "LLD PDB.", 8); + } if (DebugDirectory) DebugDirectory->setTimeDateStamp(Timestamp); @@ -1240,6 +1630,42 @@ void Writer::sortExceptionTable() { errs() << "warning: don't know how to handle .pdata.\n"; } +// The CRT section contains, among other things, the array of function +// pointers that initialize every global variable that is not trivially +// constructed. The CRT calls them one after the other prior to invoking +// main(). +// +// As per C++ spec, 3.6.2/2.3, +// "Variables with ordered initialization defined within a single +// translation unit shall be initialized in the order of their definitions +// in the translation unit" +// +// It is therefore critical to sort the chunks containing the function +// pointers in the order that they are listed in the object file (top to +// bottom), otherwise global objects might not be initialized in the +// correct order. +void Writer::sortCRTSectionChunks(std::vector<Chunk *> &Chunks) { + auto SectionChunkOrder = [](const Chunk *A, const Chunk *B) { + auto SA = dyn_cast<SectionChunk>(A); + auto SB = dyn_cast<SectionChunk>(B); + assert(SA && SB && "Non-section chunks in CRT section!"); + + StringRef SAObj = SA->File->MB.getBufferIdentifier(); + StringRef SBObj = SB->File->MB.getBufferIdentifier(); + + return SAObj == SBObj && SA->getSectionNumber() < SB->getSectionNumber(); + }; + std::stable_sort(Chunks.begin(), Chunks.end(), SectionChunkOrder); + + if (Config->Verbose) { + for (auto &C : Chunks) { + auto SC = dyn_cast<SectionChunk>(C); + log(" " + SC->File->MB.getBufferIdentifier().str() + + ", SectionID: " + Twine(SC->getSectionNumber())); + } + } +} + OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->Name == Name) @@ -1259,12 +1685,13 @@ uint32_t Writer::getSizeOfInitializedData() { void Writer::addBaserels() { if (!Config->Relocatable) return; + RelocSec->Chunks.clear(); std::vector<Baserel> V; for (OutputSection *Sec : OutputSections) { if (Sec->Header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. - for (Chunk *C : Sec->getChunks()) + for (Chunk *C : Sec->Chunks) C->getBaserels(&V); // Add the addresses to .reloc section. if (!V.empty()) diff --git a/COFF/Writer.h b/COFF/Writer.h index d37276cb6d91..727582480c91 100644 --- a/COFF/Writer.h +++ b/COFF/Writer.h @@ -34,8 +34,8 @@ public: Header.Characteristics = Chars; } void addChunk(Chunk *C); + void insertChunkAtStart(Chunk *C); void merge(OutputSection *Other); - ArrayRef<Chunk *> getChunks() { return Chunks; } void addPermissions(uint32_t C); void setPermissions(uint32_t C); uint64_t getRVA() { return Header.VirtualAddress; } @@ -62,9 +62,11 @@ public: llvm::StringRef Name; llvm::object::coff_section Header = {}; + std::vector<Chunk *> Chunks; + std::vector<Chunk *> OrigChunks; + private: uint32_t StringTableOff = 0; - std::vector<Chunk *> Chunks; }; } |