diff options
Diffstat (limited to 'ELF/SyntheticSections.cpp')
| -rw-r--r-- | ELF/SyntheticSections.cpp | 1475 |
1 files changed, 945 insertions, 530 deletions
diff --git a/ELF/SyntheticSections.cpp b/ELF/SyntheticSections.cpp index a5e291b79a4d..38859e1650bf 100644 --- a/ELF/SyntheticSections.cpp +++ b/ELF/SyntheticSections.cpp @@ -20,15 +20,16 @@ #include "InputFiles.h" #include "LinkerScript.h" #include "OutputSections.h" -#include "Strings.h" #include "SymbolTable.h" #include "Symbols.h" #include "Target.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "lld/Common/Strings.h" #include "lld/Common/Threads.h" #include "lld/Common/Version.h" +#include "llvm/ADT/SetOperations.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" #include "llvm/Object/Decompressor.h" @@ -47,27 +48,20 @@ using namespace llvm::dwarf; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support; -using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -constexpr size_t MergeNoTailSection::NumShards; - -static void write32(void *Buf, uint32_t Val) { - endian::write32(Buf, Val, Config->Endianness); -} +using llvm::support::endian::read32le; +using llvm::support::endian::write32le; +using llvm::support::endian::write64le; -uint64_t SyntheticSection::getVA() const { - if (OutputSection *Sec = getParent()) - return Sec->Addr + OutSecOff; - return 0; -} +constexpr size_t MergeNoTailSection::NumShards; // Returns an LLD version string. static ArrayRef<uint8_t> getVersion() { // Check LLD_VERSION first for ease of testing. - // You can get consitent output by using the environment variable. + // You can get consistent output by using the environment variable. // This is only for testing. StringRef S = getenv("LLD_VERSION"); if (S.empty()) @@ -192,8 +186,6 @@ MipsOptionsSection<ELFT> *MipsOptionsSection<ELFT>::create() { auto *Opt = reinterpret_cast<const Elf_Mips_Options *>(D.data()); if (Opt->kind == ODK_REGINFO) { - if (Config->Relocatable && Opt->getRegInfo().ri_gp_value) - error(Filename + ": unsupported non-zero ri_gp_value"); Reginfo.ri_gprmask |= Opt->getRegInfo().ri_gprmask; Sec->getFile<ELFT>()->MipsGp0 = Opt->getRegInfo().ri_gp_value; break; @@ -244,10 +236,8 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() { error(toString(Sec->File) + ": invalid size of .reginfo section"); return nullptr; } - auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); - if (Config->Relocatable && R->ri_gp_value) - error(toString(Sec->File) + ": unsupported non-zero ri_gp_value"); + auto *R = reinterpret_cast<const Elf_Mips_RegInfo *>(Sec->Data.data()); Reginfo.ri_gprmask |= R->ri_gprmask; Sec->getFile<ELFT>()->MipsGp0 = R->ri_gp_value; }; @@ -266,8 +256,8 @@ InputSection *elf::createInterpSection() { return Sec; } -Symbol *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, - uint64_t Size, InputSectionBase &Section) { +Defined *elf::addSyntheticLocal(StringRef Name, uint8_t Type, uint64_t Value, + uint64_t Size, InputSectionBase &Section) { auto *S = make<Defined>(Section.File, Name, STB_LOCAL, STV_DEFAULT, Type, Value, Size, &Section); if (InX::SymTab) @@ -338,8 +328,6 @@ void BuildIdSection::computeHash( BssSection::BssSection(StringRef Name, uint64_t Size, uint32_t Alignment) : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_NOBITS, Alignment, Name) { this->Bss = true; - if (OutputSection *Sec = getParent()) - Sec->Alignment = std::max(Sec->Alignment, Alignment); this->Size = Size; } @@ -380,15 +368,11 @@ EhFrameSection::EhFrameSection() // and where their relocations point to. template <class ELFT, class RelTy> CieRecord *EhFrameSection::addCie(EhSectionPiece &Cie, ArrayRef<RelTy> Rels) { - auto *Sec = cast<EhInputSection>(Cie.Sec); - if (read32(Cie.data().data() + 4, Config->Endianness) != 0) - fatal(toString(Sec) + ": CIE expected at beginning of .eh_frame"); - Symbol *Personality = nullptr; unsigned FirstRelI = Cie.FirstRelocation; if (FirstRelI != (unsigned)-1) Personality = - &Sec->template getFile<ELFT>()->getRelocTargetSym(Rels[FirstRelI]); + &Cie.Sec->template getFile<ELFT>()->getRelocTargetSym(Rels[FirstRelI]); // Search for an existing CIE by CIE contents/relocation target pair. CieRecord *&Rec = CieMap[{Cie.data(), Personality}]; @@ -433,14 +417,14 @@ bool EhFrameSection::isFdeLive(EhSectionPiece &Fde, ArrayRef<RelTy> Rels) { // one and associates FDEs to the CIE. template <class ELFT, class RelTy> void EhFrameSection::addSectionAux(EhInputSection *Sec, ArrayRef<RelTy> Rels) { - DenseMap<size_t, CieRecord *> OffsetToCie; + OffsetToCie.clear(); for (EhSectionPiece &Piece : Sec->Pieces) { // The empty record is the end marker. if (Piece.Size == 4) return; size_t Offset = Piece.InputOff; - uint32_t ID = read32(Piece.data().data() + 4, Config->Endianness); + uint32_t ID = read32(Piece.data().data() + 4); if (ID == 0) { OffsetToCie[Offset] = addCie<ELFT>(Piece, Rels); continue; @@ -468,10 +452,6 @@ template <class ELFT> void EhFrameSection::addSection(InputSectionBase *C) { for (auto *DS : Sec->DependentSections) DependentSections.push_back(DS); - // .eh_frame is a sequence of CIE or FDE records. This function - // splits it into pieces so that we can call - // SplitInputSection::getSectionPiece on the section. - Sec->split<ELFT>(); if (Sec->Pieces.empty()) return; @@ -494,9 +474,7 @@ static void writeCieFde(uint8_t *Buf, ArrayRef<uint8_t> D) { } void EhFrameSection::finalizeContents() { - if (this->Size) - return; // Already finalized. - + assert(!this->Size); // Not finalized. size_t Off = 0; for (CieRecord *Rec : CieRecords) { Rec->Cie->OutputOff = Off; @@ -509,10 +487,10 @@ void EhFrameSection::finalizeContents() { } // The LSB standard does not allow a .eh_frame section with zero - // Call Frame Information records. Therefore add a CIE record length - // 0 as a terminator if this .eh_frame section is empty. - if (Off == 0) - Off = 4; + // Call Frame Information records. glibc unwind-dw2-fde.c + // classify_object_over_fdes expects there is a CIE record length 0 as a + // terminator. Thus we add one unconditionally. + Off += 4; this->Size = Off; } @@ -524,25 +502,47 @@ std::vector<EhFrameSection::FdeData> EhFrameSection::getFdeData() const { uint8_t *Buf = getParent()->Loc + OutSecOff; std::vector<FdeData> Ret; + uint64_t VA = InX::EhFrameHdr->getVA(); for (CieRecord *Rec : CieRecords) { uint8_t Enc = getFdeEncoding(Rec->Cie); for (EhSectionPiece *Fde : Rec->Fdes) { - uint32_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); - uint32_t FdeVA = getParent()->Addr + Fde->OutputOff; - Ret.push_back({Pc, FdeVA}); + uint64_t Pc = getFdePc(Buf, Fde->OutputOff, Enc); + uint64_t FdeVA = getParent()->Addr + Fde->OutputOff; + if (!isInt<32>(Pc - VA)) + fatal(toString(Fde->Sec) + ": PC offset is too large: 0x" + + Twine::utohexstr(Pc - VA)); + Ret.push_back({uint32_t(Pc - VA), uint32_t(FdeVA - VA)}); } } + + // Sort the FDE list by their PC and uniqueify. Usually there is only + // one FDE for a PC (i.e. function), but if ICF merges two functions + // into one, there can be more than one FDEs pointing to the address. + auto Less = [](const FdeData &A, const FdeData &B) { + return A.PcRel < B.PcRel; + }; + std::stable_sort(Ret.begin(), Ret.end(), Less); + auto Eq = [](const FdeData &A, const FdeData &B) { + return A.PcRel == B.PcRel; + }; + Ret.erase(std::unique(Ret.begin(), Ret.end(), Eq), Ret.end()); + return Ret; } static uint64_t readFdeAddr(uint8_t *Buf, int Size) { switch (Size) { case DW_EH_PE_udata2: - return read16(Buf, Config->Endianness); + return read16(Buf); + case DW_EH_PE_sdata2: + return (int16_t)read16(Buf); case DW_EH_PE_udata4: - return read32(Buf, Config->Endianness); + return read32(Buf); + case DW_EH_PE_sdata4: + return (int32_t)read32(Buf); case DW_EH_PE_udata8: - return read64(Buf, Config->Endianness); + case DW_EH_PE_sdata8: + return read64(Buf); case DW_EH_PE_absptr: return readUint(Buf); } @@ -556,7 +556,7 @@ uint64_t EhFrameSection::getFdePc(uint8_t *Buf, size_t FdeOff, // The starting address to which this FDE applies is // stored at FDE + 8 byte. size_t Off = FdeOff + 8; - uint64_t Addr = readFdeAddr(Buf + Off, Enc & 0x7); + uint64_t Addr = readFdeAddr(Buf + Off, Enc & 0xf); if ((Enc & 0x70) == DW_EH_PE_absptr) return Addr; if ((Enc & 0x70) == DW_EH_PE_pcrel) @@ -589,7 +589,15 @@ void EhFrameSection::writeTo(uint8_t *Buf) { GotSection::GotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotEntrySize, ".got") {} + Target->GotEntrySize, ".got") { + // PPC64 saves the ElfSym::GlobalOffsetTable .TOC. as the first entry in the + // .got. If there are no references to .TOC. in the symbol table, + // ElfSym::GlobalOffsetTable will not be defined and we won't need to save + // .TOC. in the .got. When it is defined, we increase NumEntries by the number + // of entries used to emit ElfSym::GlobalOffsetTable. + if (ElfSym::GlobalOffsetTable && !Target->GotBaseSymInGotPlt) + NumEntries += Target->GotHeaderEntriesNum; +} void GotSection::addEntry(Symbol &Sym) { Sym.GotIndex = NumEntries; @@ -623,196 +631,383 @@ uint64_t GotSection::getGlobalDynOffset(const Symbol &B) const { return B.GlobalDynIndex * Config->Wordsize; } -void GotSection::finalizeContents() { Size = NumEntries * Config->Wordsize; } +void GotSection::finalizeContents() { + Size = NumEntries * Config->Wordsize; +} bool GotSection::empty() const { // We need to emit a GOT even if it's empty if there's a relocation that is // relative to GOT(such as GOTOFFREL) or there's a symbol that points to a GOT - // (i.e. _GLOBAL_OFFSET_TABLE_). - return NumEntries == 0 && !HasGotOffRel && !ElfSym::GlobalOffsetTable; + // (i.e. _GLOBAL_OFFSET_TABLE_) that the target defines relative to the .got. + return NumEntries == 0 && !HasGotOffRel && + !(ElfSym::GlobalOffsetTable && !Target->GotBaseSymInGotPlt); } void GotSection::writeTo(uint8_t *Buf) { // Buf points to the start of this section's buffer, // whereas InputSectionBase::relocateAlloc() expects its argument // to point to the start of the output section. + Target->writeGotHeader(Buf); relocateAlloc(Buf - OutSecOff, Buf - OutSecOff + Size); } +static uint64_t getMipsPageAddr(uint64_t Addr) { + return (Addr + 0x8000) & ~0xffff; +} + +static uint64_t getMipsPageCount(uint64_t Size) { + return (Size + 0xfffe) / 0xffff + 1; +} + MipsGotSection::MipsGotSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE | SHF_MIPS_GPREL, SHT_PROGBITS, 16, ".got") {} -void MipsGotSection::addEntry(Symbol &Sym, int64_t Addend, RelExpr Expr) { - // For "true" local symbols which can be referenced from the same module - // only compiler creates two instructions for address loading: - // - // lw $8, 0($gp) # R_MIPS_GOT16 - // addi $8, $8, 0 # R_MIPS_LO16 - // - // The first instruction loads high 16 bits of the symbol address while - // the second adds an offset. That allows to reduce number of required - // GOT entries because only one global offset table entry is necessary - // for every 64 KBytes of local data. So for local symbols we need to - // allocate number of GOT entries to hold all required "page" addresses. - // - // All global symbols (hidden and regular) considered by compiler uniformly. - // It always generates a single `lw` instruction and R_MIPS_GOT16 relocation - // to load address of the symbol. So for each such symbol we need to - // allocate dedicated GOT entry to store its address. - // - // If a symbol is preemptible we need help of dynamic linker to get its - // final address. The corresponding GOT entries are allocated in the - // "global" part of GOT. Entries for non preemptible global symbol allocated - // in the "local" part of GOT. - // - // See "Global Offset Table" in Chapter 5: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf +void MipsGotSection::addEntry(InputFile &File, Symbol &Sym, int64_t Addend, + RelExpr Expr) { + FileGot &G = getGot(File); if (Expr == R_MIPS_GOT_LOCAL_PAGE) { - // At this point we do not know final symbol value so to reduce number - // of allocated GOT entries do the following trick. Save all output - // sections referenced by GOT relocations. Then later in the `finalize` - // method calculate number of "pages" required to cover all saved output - // section and allocate appropriate number of GOT entries. - PageIndexMap.insert({Sym.getOutputSection(), 0}); - return; - } - if (Sym.isTls()) { - // GOT entries created for MIPS TLS relocations behave like - // almost GOT entries from other ABIs. They go to the end - // of the global offset table. - Sym.GotIndex = TlsEntries.size(); - TlsEntries.push_back(&Sym); - return; - } - auto AddEntry = [&](Symbol &S, uint64_t A, GotEntries &Items) { - if (S.isInGot() && !A) - return; - size_t NewIndex = Items.size(); - if (!EntryIndexMap.insert({{&S, A}, NewIndex}).second) - return; - Items.emplace_back(&S, A); - if (!A) - S.GotIndex = NewIndex; - }; - if (Sym.IsPreemptible) { - // Ignore addends for preemptible symbols. They got single GOT entry anyway. - AddEntry(Sym, 0, GlobalEntries); - Sym.IsInGlobalMipsGot = true; - } else if (Expr == R_MIPS_GOT_OFF32) { - AddEntry(Sym, Addend, LocalEntries32); - Sym.Is32BitMipsGot = true; - } else { - // Hold local GOT entries accessed via a 16-bit index separately. - // That allows to write them in the beginning of the GOT and keep - // their indexes as less as possible to escape relocation's overflow. - AddEntry(Sym, Addend, LocalEntries); - } + if (const OutputSection *OS = Sym.getOutputSection()) + G.PagesMap.insert({OS, {}}); + else + G.Local16.insert({{nullptr, getMipsPageAddr(Sym.getVA(Addend))}, 0}); + } else if (Sym.isTls()) + G.Tls.insert({&Sym, 0}); + else if (Sym.IsPreemptible && Expr == R_ABS) + G.Relocs.insert({&Sym, 0}); + else if (Sym.IsPreemptible) + G.Global.insert({&Sym, 0}); + else if (Expr == R_MIPS_GOT_OFF32) + G.Local32.insert({{&Sym, Addend}, 0}); + else + G.Local16.insert({{&Sym, Addend}, 0}); } -bool MipsGotSection::addDynTlsEntry(Symbol &Sym) { - if (Sym.GlobalDynIndex != -1U) - return false; - Sym.GlobalDynIndex = TlsEntries.size(); - // Global Dynamic TLS entries take two GOT slots. - TlsEntries.push_back(nullptr); - TlsEntries.push_back(&Sym); - return true; +void MipsGotSection::addDynTlsEntry(InputFile &File, Symbol &Sym) { + getGot(File).DynTlsSymbols.insert({&Sym, 0}); } -// Reserves TLS entries for a TLS module ID and a TLS block offset. -// In total it takes two GOT slots. -bool MipsGotSection::addTlsIndex() { - if (TlsIndexOff != uint32_t(-1)) - return false; - TlsIndexOff = TlsEntries.size() * Config->Wordsize; - TlsEntries.push_back(nullptr); - TlsEntries.push_back(nullptr); - return true; +void MipsGotSection::addTlsIndex(InputFile &File) { + getGot(File).DynTlsSymbols.insert({nullptr, 0}); } -static uint64_t getMipsPageAddr(uint64_t Addr) { - return (Addr + 0x8000) & ~0xffff; +size_t MipsGotSection::FileGot::getEntriesNum() const { + return getPageEntriesNum() + Local16.size() + Global.size() + Relocs.size() + + Tls.size() + DynTlsSymbols.size() * 2; } -static uint64_t getMipsPageCount(uint64_t Size) { - return (Size + 0xfffe) / 0xffff + 1; +size_t MipsGotSection::FileGot::getPageEntriesNum() const { + size_t Num = 0; + for (const std::pair<const OutputSection *, FileGot::PageBlock> &P : PagesMap) + Num += P.second.Count; + return Num; } -uint64_t MipsGotSection::getPageEntryOffset(const Symbol &B, - int64_t Addend) const { - const OutputSection *OutSec = B.getOutputSection(); - uint64_t SecAddr = getMipsPageAddr(OutSec->Addr); - uint64_t SymAddr = getMipsPageAddr(B.getVA(Addend)); - uint64_t Index = PageIndexMap.lookup(OutSec) + (SymAddr - SecAddr) / 0xffff; - assert(Index < PageEntriesNum); - return (HeaderEntriesNum + Index) * Config->Wordsize; +size_t MipsGotSection::FileGot::getIndexedEntriesNum() const { + size_t Count = getPageEntriesNum() + Local16.size() + Global.size(); + // If there are relocation-only entries in the GOT, TLS entries + // are allocated after them. TLS entries should be addressable + // by 16-bit index so count both reloc-only and TLS entries. + if (!Tls.empty() || !DynTlsSymbols.empty()) + Count += Relocs.size() + Tls.size() + DynTlsSymbols.size() * 2; + return Count; } -uint64_t MipsGotSection::getSymEntryOffset(const Symbol &B, - int64_t Addend) const { - // Calculate offset of the GOT entries block: TLS, global, local. - uint64_t Index = HeaderEntriesNum + PageEntriesNum; - if (B.isTls()) - Index += LocalEntries.size() + LocalEntries32.size() + GlobalEntries.size(); - else if (B.IsInGlobalMipsGot) - Index += LocalEntries.size() + LocalEntries32.size(); - else if (B.Is32BitMipsGot) - Index += LocalEntries.size(); - // Calculate offset of the GOT entry in the block. - if (B.isInGot()) - Index += B.GotIndex; - else { - auto It = EntryIndexMap.find({&B, Addend}); - assert(It != EntryIndexMap.end()); - Index += It->second; +MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &F) { + if (!F.MipsGotIndex.hasValue()) { + Gots.emplace_back(); + Gots.back().File = &F; + F.MipsGotIndex = Gots.size() - 1; + } + return Gots[*F.MipsGotIndex]; +} + +uint64_t MipsGotSection::getPageEntryOffset(const InputFile *F, + const Symbol &Sym, + int64_t Addend) const { + const FileGot &G = Gots[*F->MipsGotIndex]; + uint64_t Index = 0; + if (const OutputSection *OutSec = Sym.getOutputSection()) { + uint64_t SecAddr = getMipsPageAddr(OutSec->Addr); + uint64_t SymAddr = getMipsPageAddr(Sym.getVA(Addend)); + Index = G.PagesMap.lookup(OutSec).FirstIndex + (SymAddr - SecAddr) / 0xffff; + } else { + Index = G.Local16.lookup({nullptr, getMipsPageAddr(Sym.getVA(Addend))}); } return Index * Config->Wordsize; } -uint64_t MipsGotSection::getTlsOffset() const { - return (getLocalEntriesNum() + GlobalEntries.size()) * Config->Wordsize; +uint64_t MipsGotSection::getSymEntryOffset(const InputFile *F, const Symbol &S, + int64_t Addend) const { + const FileGot &G = Gots[*F->MipsGotIndex]; + Symbol *Sym = const_cast<Symbol *>(&S); + if (Sym->isTls()) + return G.Tls.lookup(Sym) * Config->Wordsize; + if (Sym->IsPreemptible) + return G.Global.lookup(Sym) * Config->Wordsize; + return G.Local16.lookup({Sym, Addend}) * Config->Wordsize; } -uint64_t MipsGotSection::getGlobalDynOffset(const Symbol &B) const { - return B.GlobalDynIndex * Config->Wordsize; +uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *F) const { + const FileGot &G = Gots[*F->MipsGotIndex]; + return G.DynTlsSymbols.lookup(nullptr) * Config->Wordsize; +} + +uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *F, + const Symbol &S) const { + const FileGot &G = Gots[*F->MipsGotIndex]; + Symbol *Sym = const_cast<Symbol *>(&S); + return G.DynTlsSymbols.lookup(Sym) * Config->Wordsize; } const Symbol *MipsGotSection::getFirstGlobalEntry() const { - return GlobalEntries.empty() ? nullptr : GlobalEntries.front().first; + if (Gots.empty()) + return nullptr; + const FileGot &PrimGot = Gots.front(); + if (!PrimGot.Global.empty()) + return PrimGot.Global.front().first; + if (!PrimGot.Relocs.empty()) + return PrimGot.Relocs.front().first; + return nullptr; } unsigned MipsGotSection::getLocalEntriesNum() const { - return HeaderEntriesNum + PageEntriesNum + LocalEntries.size() + - LocalEntries32.size(); + if (Gots.empty()) + return HeaderEntriesNum; + return HeaderEntriesNum + Gots.front().getPageEntriesNum() + + Gots.front().Local16.size(); +} + +bool MipsGotSection::tryMergeGots(FileGot &Dst, FileGot &Src, bool IsPrimary) { + FileGot Tmp = Dst; + set_union(Tmp.PagesMap, Src.PagesMap); + set_union(Tmp.Local16, Src.Local16); + set_union(Tmp.Global, Src.Global); + set_union(Tmp.Relocs, Src.Relocs); + set_union(Tmp.Tls, Src.Tls); + set_union(Tmp.DynTlsSymbols, Src.DynTlsSymbols); + + size_t Count = IsPrimary ? HeaderEntriesNum : 0; + Count += Tmp.getIndexedEntriesNum(); + + if (Count * Config->Wordsize > Config->MipsGotSize) + return false; + + std::swap(Tmp, Dst); + return true; } void MipsGotSection::finalizeContents() { updateAllocSize(); } bool MipsGotSection::updateAllocSize() { - PageEntriesNum = 0; - for (std::pair<const OutputSection *, size_t> &P : PageIndexMap) { - // For each output section referenced by GOT page relocations calculate - // and save into PageIndexMap an upper bound of MIPS GOT entries required - // to store page addresses of local symbols. We assume the worst case - - // each 64kb page of the output section has at least one GOT relocation - // against it. And take in account the case when the section intersects - // page boundaries. - P.second = PageEntriesNum; - PageEntriesNum += getMipsPageCount(P.first->Size); - } - Size = (getLocalEntriesNum() + GlobalEntries.size() + TlsEntries.size()) * - Config->Wordsize; + Size = HeaderEntriesNum * Config->Wordsize; + for (const FileGot &G : Gots) + Size += G.getEntriesNum() * Config->Wordsize; return false; } +template <class ELFT> void MipsGotSection::build() { + if (Gots.empty()) + return; + + std::vector<FileGot> MergedGots(1); + + // For each GOT move non-preemptible symbols from the `Global` + // to `Local16` list. Preemptible symbol might become non-preemptible + // one if, for example, it gets a related copy relocation. + for (FileGot &Got : Gots) { + for (auto &P: Got.Global) + if (!P.first->IsPreemptible) + Got.Local16.insert({{P.first, 0}, 0}); + Got.Global.remove_if([&](const std::pair<Symbol *, size_t> &P) { + return !P.first->IsPreemptible; + }); + } + + // For each GOT remove "reloc-only" entry if there is "global" + // entry for the same symbol. And add local entries which indexed + // using 32-bit value at the end of 16-bit entries. + for (FileGot &Got : Gots) { + Got.Relocs.remove_if([&](const std::pair<Symbol *, size_t> &P) { + return Got.Global.count(P.first); + }); + set_union(Got.Local16, Got.Local32); + Got.Local32.clear(); + } + + // Evaluate number of "reloc-only" entries in the resulting GOT. + // To do that put all unique "reloc-only" and "global" entries + // from all GOTs to the future primary GOT. + FileGot *PrimGot = &MergedGots.front(); + for (FileGot &Got : Gots) { + set_union(PrimGot->Relocs, Got.Global); + set_union(PrimGot->Relocs, Got.Relocs); + Got.Relocs.clear(); + } + + // Evaluate number of "page" entries in each GOT. + for (FileGot &Got : Gots) { + for (std::pair<const OutputSection *, FileGot::PageBlock> &P : + Got.PagesMap) { + const OutputSection *OS = P.first; + uint64_t SecSize = 0; + for (BaseCommand *Cmd : OS->SectionCommands) { + if (auto *ISD = dyn_cast<InputSectionDescription>(Cmd)) + for (InputSection *IS : ISD->Sections) { + uint64_t Off = alignTo(SecSize, IS->Alignment); + SecSize = Off + IS->getSize(); + } + } + P.second.Count = getMipsPageCount(SecSize); + } + } + + // Merge GOTs. Try to join as much as possible GOTs but do not exceed + // maximum GOT size. At first, try to fill the primary GOT because + // the primary GOT can be accessed in the most effective way. If it + // is not possible, try to fill the last GOT in the list, and finally + // create a new GOT if both attempts failed. + for (FileGot &SrcGot : Gots) { + InputFile *File = SrcGot.File; + if (tryMergeGots(MergedGots.front(), SrcGot, true)) { + File->MipsGotIndex = 0; + } else { + // If this is the first time we failed to merge with the primary GOT, + // MergedGots.back() will also be the primary GOT. We must make sure not + // to try to merge again with IsPrimary=false, as otherwise, if the + // inputs are just right, we could allow the primary GOT to become 1 or 2 + // words too big due to ignoring the header size. + if (MergedGots.size() == 1 || + !tryMergeGots(MergedGots.back(), SrcGot, false)) { + MergedGots.emplace_back(); + std::swap(MergedGots.back(), SrcGot); + } + File->MipsGotIndex = MergedGots.size() - 1; + } + } + std::swap(Gots, MergedGots); + + // Reduce number of "reloc-only" entries in the primary GOT + // by substracting "global" entries exist in the primary GOT. + PrimGot = &Gots.front(); + PrimGot->Relocs.remove_if([&](const std::pair<Symbol *, size_t> &P) { + return PrimGot->Global.count(P.first); + }); + + // Calculate indexes for each GOT entry. + size_t Index = HeaderEntriesNum; + for (FileGot &Got : Gots) { + Got.StartIndex = &Got == PrimGot ? 0 : Index; + for (std::pair<const OutputSection *, FileGot::PageBlock> &P : + Got.PagesMap) { + // For each output section referenced by GOT page relocations calculate + // and save into PagesMap an upper bound of MIPS GOT entries required + // to store page addresses of local symbols. We assume the worst case - + // each 64kb page of the output section has at least one GOT relocation + // against it. And take in account the case when the section intersects + // page boundaries. + P.second.FirstIndex = Index; + Index += P.second.Count; + } + for (auto &P: Got.Local16) + P.second = Index++; + for (auto &P: Got.Global) + P.second = Index++; + for (auto &P: Got.Relocs) + P.second = Index++; + for (auto &P: Got.Tls) + P.second = Index++; + for (auto &P: Got.DynTlsSymbols) { + P.second = Index; + Index += 2; + } + } + + // Update Symbol::GotIndex field to use this + // value later in the `sortMipsSymbols` function. + for (auto &P : PrimGot->Global) + P.first->GotIndex = P.second; + for (auto &P : PrimGot->Relocs) + P.first->GotIndex = P.second; + + // Create dynamic relocations. + for (FileGot &Got : Gots) { + // Create dynamic relocations for TLS entries. + for (std::pair<Symbol *, size_t> &P : Got.Tls) { + Symbol *S = P.first; + uint64_t Offset = P.second * Config->Wordsize; + if (S->IsPreemptible) + InX::RelaDyn->addReloc(Target->TlsGotRel, this, Offset, S); + } + for (std::pair<Symbol *, size_t> &P : Got.DynTlsSymbols) { + Symbol *S = P.first; + uint64_t Offset = P.second * Config->Wordsize; + if (S == nullptr) { + if (!Config->Pic) + continue; + InX::RelaDyn->addReloc(Target->TlsModuleIndexRel, this, Offset, S); + } else { + // When building a shared library we still need a dynamic relocation + // for the module index. Therefore only checking for + // S->IsPreemptible is not sufficient (this happens e.g. for + // thread-locals that have been marked as local through a linker script) + if (!S->IsPreemptible && !Config->Pic) + continue; + InX::RelaDyn->addReloc(Target->TlsModuleIndexRel, this, Offset, S); + // However, we can skip writing the TLS offset reloc for non-preemptible + // symbols since it is known even in shared libraries + if (!S->IsPreemptible) + continue; + Offset += Config->Wordsize; + InX::RelaDyn->addReloc(Target->TlsOffsetRel, this, Offset, S); + } + } + + // Do not create dynamic relocations for non-TLS + // entries in the primary GOT. + if (&Got == PrimGot) + continue; + + // Dynamic relocations for "global" entries. + for (const std::pair<Symbol *, size_t> &P : Got.Global) { + uint64_t Offset = P.second * Config->Wordsize; + InX::RelaDyn->addReloc(Target->RelativeRel, this, Offset, P.first); + } + if (!Config->Pic) + continue; + // Dynamic relocations for "local" entries in case of PIC. + for (const std::pair<const OutputSection *, FileGot::PageBlock> &L : + Got.PagesMap) { + size_t PageCount = L.second.Count; + for (size_t PI = 0; PI < PageCount; ++PI) { + uint64_t Offset = (L.second.FirstIndex + PI) * Config->Wordsize; + InX::RelaDyn->addReloc({Target->RelativeRel, this, Offset, L.first, + int64_t(PI * 0x10000)}); + } + } + for (const std::pair<GotEntry, size_t> &P : Got.Local16) { + uint64_t Offset = P.second * Config->Wordsize; + InX::RelaDyn->addReloc({Target->RelativeRel, this, Offset, true, + P.first.first, P.first.second}); + } + } +} + bool MipsGotSection::empty() const { // We add the .got section to the result for dynamic MIPS target because // its address and properties are mentioned in the .dynamic section. return Config->Relocatable; } -uint64_t MipsGotSection::getGp() const { return ElfSym::MipsGp->getVA(0); } +uint64_t MipsGotSection::getGp(const InputFile *F) const { + // For files without related GOT or files refer a primary GOT + // returns "common" _gp value. For secondary GOTs calculate + // individual _gp values. + if (!F || !F->MipsGotIndex.hasValue() || *F->MipsGotIndex == 0) + return ElfSym::MipsGp->getVA(0); + return getVA() + Gots[*F->MipsGotIndex].StartIndex * Config->Wordsize + + 0x7ff0; +} void MipsGotSection::writeTo(uint8_t *Buf) { // Set the MSB of the second GOT slot. This is not required by any @@ -830,59 +1025,67 @@ void MipsGotSection::writeTo(uint8_t *Buf) { // keep doing this for now. We really need to revisit this to see // if we had to do this. writeUint(Buf + Config->Wordsize, (uint64_t)1 << (Config->Wordsize * 8 - 1)); - Buf += HeaderEntriesNum * Config->Wordsize; - // Write 'page address' entries to the local part of the GOT. - for (std::pair<const OutputSection *, size_t> &L : PageIndexMap) { - size_t PageCount = getMipsPageCount(L.first->Size); - uint64_t FirstPageAddr = getMipsPageAddr(L.first->Addr); - for (size_t PI = 0; PI < PageCount; ++PI) { - uint8_t *Entry = Buf + (L.second + PI) * Config->Wordsize; - writeUint(Entry, FirstPageAddr + PI * 0x10000); - } - } - Buf += PageEntriesNum * Config->Wordsize; - auto AddEntry = [&](const GotEntry &SA) { - uint8_t *Entry = Buf; - Buf += Config->Wordsize; - const Symbol *Sym = SA.first; - uint64_t VA = Sym->getVA(SA.second); - if (Sym->StOther & STO_MIPS_MICROMIPS) - VA |= 1; - writeUint(Entry, VA); - }; - std::for_each(std::begin(LocalEntries), std::end(LocalEntries), AddEntry); - std::for_each(std::begin(LocalEntries32), std::end(LocalEntries32), AddEntry); - std::for_each(std::begin(GlobalEntries), std::end(GlobalEntries), AddEntry); - // Initialize TLS-related GOT entries. If the entry has a corresponding - // dynamic relocations, leave it initialized by zero. Write down adjusted - // TLS symbol's values otherwise. To calculate the adjustments use offsets - // for thread-local storage. - // https://www.linux-mips.org/wiki/NPTL - if (TlsIndexOff != -1U && !Config->Pic) - writeUint(Buf + TlsIndexOff, 1); - for (const Symbol *B : TlsEntries) { - if (!B || B->IsPreemptible) - continue; - uint64_t VA = B->getVA(); - if (B->GotIndex != -1U) { - uint8_t *Entry = Buf + B->GotIndex * Config->Wordsize; - writeUint(Entry, VA - 0x7000); + for (const FileGot &G : Gots) { + auto Write = [&](size_t I, const Symbol *S, int64_t A) { + uint64_t VA = A; + if (S) { + VA = S->getVA(A); + if (S->StOther & STO_MIPS_MICROMIPS) + VA |= 1; + } + writeUint(Buf + I * Config->Wordsize, VA); + }; + // Write 'page address' entries to the local part of the GOT. + for (const std::pair<const OutputSection *, FileGot::PageBlock> &L : + G.PagesMap) { + size_t PageCount = L.second.Count; + uint64_t FirstPageAddr = getMipsPageAddr(L.first->Addr); + for (size_t PI = 0; PI < PageCount; ++PI) + Write(L.second.FirstIndex + PI, nullptr, FirstPageAddr + PI * 0x10000); } - if (B->GlobalDynIndex != -1U) { - uint8_t *Entry = Buf + B->GlobalDynIndex * Config->Wordsize; - writeUint(Entry, 1); - Entry += Config->Wordsize; - writeUint(Entry, VA - 0x8000); + // Local, global, TLS, reloc-only entries. + // If TLS entry has a corresponding dynamic relocations, leave it + // initialized by zero. Write down adjusted TLS symbol's values otherwise. + // To calculate the adjustments use offsets for thread-local storage. + // https://www.linux-mips.org/wiki/NPTL + for (const std::pair<GotEntry, size_t> &P : G.Local16) + Write(P.second, P.first.first, P.first.second); + // Write VA to the primary GOT only. For secondary GOTs that + // will be done by REL32 dynamic relocations. + if (&G == &Gots.front()) + for (const std::pair<const Symbol *, size_t> &P : G.Global) + Write(P.second, P.first, 0); + for (const std::pair<Symbol *, size_t> &P : G.Relocs) + Write(P.second, P.first, 0); + for (const std::pair<Symbol *, size_t> &P : G.Tls) + Write(P.second, P.first, P.first->IsPreemptible ? 0 : -0x7000); + for (const std::pair<Symbol *, size_t> &P : G.DynTlsSymbols) { + if (P.first == nullptr && !Config->Pic) + Write(P.second, nullptr, 1); + else if (P.first && !P.first->IsPreemptible) { + // If we are emitting PIC code with relocations we mustn't write + // anything to the GOT here. When using Elf_Rel relocations the value + // one will be treated as an addend and will cause crashes at runtime + if (!Config->Pic) + Write(P.second, nullptr, 1); + Write(P.second + 1, P.first, -0x8000); + } } } } +// On PowerPC the .plt section is used to hold the table of function addresses +// instead of the .got.plt, and the type is SHT_NOBITS similar to a .bss +// section. I don't know why we have a BSS style type for the section but it is +// consitent across both 64-bit PowerPC ABIs as well as the 32-bit PowerPC ABI. GotPltSection::GotPltSection() - : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotPltEntrySize, ".got.plt") {} + : SyntheticSection(SHF_ALLOC | SHF_WRITE, + Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS, + Target->GotPltEntrySize, + Config->EMachine == EM_PPC64 ? ".plt" : ".got.plt") {} void GotPltSection::addEntry(Symbol &Sym) { - Sym.GotPltIndex = Target->GotPltHeaderEntriesNum + Entries.size(); + assert(Sym.PltIndex == Entries.size()); Entries.push_back(&Sym); } @@ -900,16 +1103,37 @@ void GotPltSection::writeTo(uint8_t *Buf) { } } -// On ARM the IgotPltSection is part of the GotSection, on other Targets it is -// part of the .got.plt +bool GotPltSection::empty() const { + // We need to emit a GOT.PLT even if it's empty if there's a symbol that + // references the _GLOBAL_OFFSET_TABLE_ and the Target defines the symbol + // relative to the .got.plt section. + return Entries.empty() && + !(ElfSym::GlobalOffsetTable && Target->GotBaseSymInGotPlt); +} + +static StringRef getIgotPltName() { + // On ARM the IgotPltSection is part of the GotSection. + if (Config->EMachine == EM_ARM) + return ".got"; + + // On PowerPC64 the GotPltSection is renamed to '.plt' so the IgotPltSection + // needs to be named the same. + if (Config->EMachine == EM_PPC64) + return ".plt"; + + return ".got.plt"; +} + +// On PowerPC64 the GotPltSection type is SHT_NOBITS so we have to follow suit +// with the IgotPltSection. IgotPltSection::IgotPltSection() - : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, - Target->GotPltEntrySize, - Config->EMachine == EM_ARM ? ".got" : ".got.plt") {} + : SyntheticSection(SHF_ALLOC | SHF_WRITE, + Config->EMachine == EM_PPC64 ? SHT_NOBITS : SHT_PROGBITS, + Target->GotPltEntrySize, getIgotPltName()) {} void IgotPltSection::addEntry(Symbol &Sym) { Sym.IsInIgot = true; - Sym.GotPltIndex = Entries.size(); + assert(Sym.PltIndex == Entries.size()); Entries.push_back(&Sym); } @@ -1005,8 +1229,14 @@ void DynamicSection<ELFT>::addInt(int32_t Tag, uint64_t Val) { template <class ELFT> void DynamicSection<ELFT>::addInSec(int32_t Tag, InputSection *Sec) { + Entries.push_back({Tag, [=] { return Sec->getVA(0); }}); +} + +template <class ELFT> +void DynamicSection<ELFT>::addInSecRelative(int32_t Tag, InputSection *Sec) { + size_t TagOffset = Entries.size() * Entsize; Entries.push_back( - {Tag, [=] { return Sec->getParent()->Addr + Sec->OutSecOff; }}); + {Tag, [=] { return Sec->getVA(0) - (getVA() + TagOffset); }}); } template <class ELFT> @@ -1034,6 +1264,8 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { uint32_t DtFlags1 = 0; if (Config->Bsymbolic) DtFlags |= DF_SYMBOLIC; + if (Config->ZInitfirst) + DtFlags1 |= DF_1_INITFIRST; if (Config->ZNodelete) DtFlags1 |= DF_1_NODELETE; if (Config->ZNodlopen) @@ -1046,6 +1278,8 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { DtFlags |= DF_ORIGIN; DtFlags1 |= DF_1_ORIGIN; } + if (!Config->ZText) + DtFlags |= DF_TEXTREL; if (DtFlags) addInt(DT_FLAGS, DtFlags); @@ -1064,7 +1298,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { addInt(DT_DEBUG, 0); this->Link = InX::DynStrTab->getParent()->SectionIndex; - if (InX::RelaDyn->getParent() && !InX::RelaDyn->empty()) { + if (!InX::RelaDyn->empty()) { addInSec(InX::RelaDyn->DynamicTag, InX::RelaDyn); addSize(InX::RelaDyn->SizeDynamicTag, InX::RelaDyn->getParent()); @@ -1081,7 +1315,21 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { addInt(IsRela ? DT_RELACOUNT : DT_RELCOUNT, NumRelativeRels); } } - if (InX::RelaPlt->getParent() && !InX::RelaPlt->empty()) { + if (InX::RelrDyn && !InX::RelrDyn->Relocs.empty()) { + addInSec(Config->UseAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR, + InX::RelrDyn); + addSize(Config->UseAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, + InX::RelrDyn->getParent()); + addInt(Config->UseAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, + sizeof(Elf_Relr)); + } + // .rel[a].plt section usually consists of two parts, containing plt and + // iplt relocations. It is possible to have only iplt relocations in the + // output. In that case RelaPlt is empty and have zero offset, the same offset + // as RelaIplt have. And we still want to emit proper dynamic tags for that + // case, so here we always use RelaPlt as marker for the begining of + // .rel[a].plt section. + if (InX::RelaPlt->getParent()->Live) { addInSec(DT_JMPREL, InX::RelaPlt); addSize(DT_PLTRELSZ, InX::RelaPlt->getParent()); switch (Config->EMachine) { @@ -1154,8 +1402,23 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() { else addInt(DT_MIPS_GOTSYM, InX::DynSymTab->getNumSymbols()); addInSec(DT_PLTGOT, InX::MipsGot); - if (InX::MipsRldMap) - addInSec(DT_MIPS_RLD_MAP, InX::MipsRldMap); + if (InX::MipsRldMap) { + if (!Config->Pie) + addInSec(DT_MIPS_RLD_MAP, InX::MipsRldMap); + // Store the offset to the .rld_map section + // relative to the address of the tag. + addInSecRelative(DT_MIPS_RLD_MAP_REL, InX::MipsRldMap); + } + } + + // Glink dynamic tag is required by the V2 abi if the plt section isn't empty. + if (Config->EMachine == EM_PPC64 && !InX::Plt->empty()) { + // The Glink tag points to 32 bytes before the first lazy symbol resolution + // stub, which starts directly after the header. + Entries.push_back({DT_PPC64_GLINK, [=] { + unsigned Offset = Target->PltHeaderSize - 32; + return InX::Plt->getVA(0) + Offset; + }}); } addInt(DT_NULL, 0); @@ -1175,13 +1438,16 @@ template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *Buf) { } uint64_t DynamicReloc::getOffset() const { - return InputSec->getOutputSection()->Addr + InputSec->getOffset(OffsetInSec); + return InputSec->getVA(OffsetInSec); } -int64_t DynamicReloc::getAddend() const { +int64_t DynamicReloc::computeAddend() const { if (UseSymVA) return Sym->getVA(Addend); - return Addend; + if (!OutputSec) + return Addend; + // See the comment in the DynamicReloc ctor. + return getMipsPageAddr(OutputSec->Addr) + Addend; } uint32_t DynamicReloc::getSymIndex() const { @@ -1196,6 +1462,23 @@ RelocationBaseSection::RelocationBaseSection(StringRef Name, uint32_t Type, : SyntheticSection(SHF_ALLOC, Type, Config->Wordsize, Name), DynamicTag(DynamicTag), SizeDynamicTag(SizeDynamicTag) {} +void RelocationBaseSection::addReloc(RelType DynType, InputSectionBase *IS, + uint64_t OffsetInSec, Symbol *Sym) { + addReloc({DynType, IS, OffsetInSec, false, Sym, 0}); +} + +void RelocationBaseSection::addReloc(RelType DynType, + InputSectionBase *InputSec, + uint64_t OffsetInSec, Symbol *Sym, + int64_t Addend, RelExpr Expr, + RelType Type) { + // Write the addends to the relocated address if required. We skip + // it if the written value would be zero. + if (Config->WriteAddends && (Expr != R_ADDEND || Addend != 0)) + InputSec->Relocations.push_back({Expr, Type, OffsetInSec, Addend, Sym}); + addReloc({DynType, InputSec, OffsetInSec, Expr != R_ADDEND, Sym, Addend}); +} + void RelocationBaseSection::addReloc(const DynamicReloc &Reloc) { if (Reloc.Type == Target->RelativeRel) ++NumRelativeRelocs; @@ -1212,23 +1495,17 @@ void RelocationBaseSection::finalizeContents() { getParent()->Link = Link; } +RelrBaseSection::RelrBaseSection() + : SyntheticSection(SHF_ALLOC, + Config->UseAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, + Config->Wordsize, ".relr.dyn") {} + template <class ELFT> static void encodeDynamicReloc(typename ELFT::Rela *P, const DynamicReloc &Rel) { if (Config->IsRela) - P->r_addend = Rel.getAddend(); + P->r_addend = Rel.computeAddend(); P->r_offset = Rel.getOffset(); - if (Config->EMachine == EM_MIPS && Rel.getInputSec() == InX::MipsGot) - // The MIPS GOT section contains dynamic relocations that correspond to TLS - // entries. These entries are placed after the global and local sections of - // the GOT. At the point when we create these relocations, the size of the - // global and local sections is unknown, so the offset that we store in the - // TLS entry's DynamicReloc is relative to the start of the TLS section of - // the GOT, rather than being relative to the start of the GOT. This line of - // code adds the size of the global and local sections to the virtual - // address computed by getOffset() in order to adjust it into the TLS - // section. - P->r_offset += InX::MipsGot->getTlsOffset(); P->setSymbolAndType(Rel.getSymIndex(), Rel.Type, Config->IsMips64EL); } @@ -1241,32 +1518,22 @@ RelocationSection<ELFT>::RelocationSection(StringRef Name, bool Sort) this->Entsize = Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } -template <class ELFT, class RelTy> -static bool compRelocations(const RelTy &A, const RelTy &B) { - bool AIsRel = A.getType(Config->IsMips64EL) == Target->RelativeRel; - bool BIsRel = B.getType(Config->IsMips64EL) == Target->RelativeRel; +static bool compRelocations(const DynamicReloc &A, const DynamicReloc &B) { + bool AIsRel = A.Type == Target->RelativeRel; + bool BIsRel = B.Type == Target->RelativeRel; if (AIsRel != BIsRel) return AIsRel; - - return A.getSymbol(Config->IsMips64EL) < B.getSymbol(Config->IsMips64EL); + return A.getSymIndex() < B.getSymIndex(); } template <class ELFT> void RelocationSection<ELFT>::writeTo(uint8_t *Buf) { - uint8_t *BufBegin = Buf; + if (Sort) + std::stable_sort(Relocs.begin(), Relocs.end(), compRelocations); + for (const DynamicReloc &Rel : Relocs) { encodeDynamicReloc<ELFT>(reinterpret_cast<Elf_Rela *>(Buf), Rel); Buf += Config->IsRela ? sizeof(Elf_Rela) : sizeof(Elf_Rel); } - - if (Sort) { - if (Config->IsRela) - std::stable_sort((Elf_Rela *)BufBegin, - (Elf_Rela *)BufBegin + Relocs.size(), - compRelocations<ELFT, Elf_Rela>); - else - std::stable_sort((Elf_Rel *)BufBegin, (Elf_Rel *)BufBegin + Relocs.size(), - compRelocations<ELFT, Elf_Rel>); - } } template <class ELFT> unsigned RelocationSection<ELFT>::getRelocOffset() { @@ -1354,10 +1621,10 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() { NonRelatives.push_back(R); } - std::sort(Relatives.begin(), Relatives.end(), - [](const Elf_Rel &A, const Elf_Rel &B) { - return A.r_offset < B.r_offset; - }); + llvm::sort(Relatives.begin(), Relatives.end(), + [](const Elf_Rel &A, const Elf_Rel &B) { + return A.r_offset < B.r_offset; + }); // Try to find groups of relative relocations which are spaced one word // apart from one another. These generally correspond to vtable entries. The @@ -1435,10 +1702,10 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() { } // Finally the non-relative relocations. - std::sort(NonRelatives.begin(), NonRelatives.end(), - [](const Elf_Rela &A, const Elf_Rela &B) { - return A.r_offset < B.r_offset; - }); + llvm::sort(NonRelatives.begin(), NonRelatives.end(), + [](const Elf_Rela &A, const Elf_Rela &B) { + return A.r_offset < B.r_offset; + }); if (!NonRelatives.empty()) { Add(NonRelatives.size()); Add(HasAddendIfRela); @@ -1461,6 +1728,97 @@ bool AndroidPackedRelocationSection<ELFT>::updateAllocSize() { return RelocData.size() != OldSize; } +template <class ELFT> RelrSection<ELFT>::RelrSection() { + this->Entsize = Config->Wordsize; +} + +template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() { + // This function computes the contents of an SHT_RELR packed relocation + // section. + // + // Proposal for adding SHT_RELR sections to generic-abi is here: + // https://groups.google.com/forum/#!topic/generic-abi/bX460iggiKg + // + // The encoded sequence of Elf64_Relr entries in a SHT_RELR section looks + // like [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] + // + // i.e. start with an address, followed by any number of bitmaps. The address + // entry encodes 1 relocation. The subsequent bitmap entries encode up to 63 + // relocations each, at subsequent offsets following the last address entry. + // + // The bitmap entries must have 1 in the least significant bit. The assumption + // here is that an address cannot have 1 in lsb. Odd addresses are not + // supported. + // + // Excluding the least significant bit in the bitmap, each non-zero bit in + // the bitmap represents a relocation to be applied to a corresponding machine + // word that follows the base address word. The second least significant bit + // represents the machine word immediately following the initial address, and + // each bit that follows represents the next word, in linear order. As such, + // a single bitmap can encode up to 31 relocations in a 32-bit object, and + // 63 relocations in a 64-bit object. + // + // This encoding has a couple of interesting properties: + // 1. Looking at any entry, it is clear whether it's an address or a bitmap: + // even means address, odd means bitmap. + // 2. Just a simple list of addresses is a valid encoding. + + size_t OldSize = RelrRelocs.size(); + RelrRelocs.clear(); + + // Same as Config->Wordsize but faster because this is a compile-time + // constant. + const size_t Wordsize = sizeof(typename ELFT::uint); + + // Number of bits to use for the relocation offsets bitmap. + // Must be either 63 or 31. + const size_t NBits = Wordsize * 8 - 1; + + // Get offsets for all relative relocations and sort them. + std::vector<uint64_t> Offsets; + for (const RelativeReloc &Rel : Relocs) + Offsets.push_back(Rel.getOffset()); + llvm::sort(Offsets.begin(), Offsets.end()); + + // For each leading relocation, find following ones that can be folded + // as a bitmap and fold them. + for (size_t I = 0, E = Offsets.size(); I < E;) { + // Add a leading relocation. + RelrRelocs.push_back(Elf_Relr(Offsets[I])); + uint64_t Base = Offsets[I] + Wordsize; + ++I; + + // Find foldable relocations to construct bitmaps. + while (I < E) { + uint64_t Bitmap = 0; + + while (I < E) { + uint64_t Delta = Offsets[I] - Base; + + // If it is too far, it cannot be folded. + if (Delta >= NBits * Wordsize) + break; + + // If it is not a multiple of wordsize away, it cannot be folded. + if (Delta % Wordsize) + break; + + // Fold it. + Bitmap |= 1ULL << (Delta / Wordsize); + ++I; + } + + if (!Bitmap) + break; + + RelrRelocs.push_back(Elf_Relr((Bitmap << 1) | 1)); + Base += NBits * Wordsize; + } + } + + return RelrRelocs.size() != OldSize; +} + SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &StrTabSec) : SyntheticSection(StrTabSec.isDynamic() ? (uint64_t)SHF_ALLOC : 0, StrTabSec.isDynamic() ? SHT_DYNSYM : SHT_SYMTAB, @@ -1476,50 +1834,70 @@ SymbolTableBaseSection::SymbolTableBaseSection(StringTableSection &StrTabSec) static bool sortMipsSymbols(const SymbolTableEntry &L, const SymbolTableEntry &R) { // Sort entries related to non-local preemptible symbols by GOT indexes. - // All other entries go to the first part of GOT in arbitrary order. - bool LIsInLocalGot = !L.Sym->IsInGlobalMipsGot; - bool RIsInLocalGot = !R.Sym->IsInGlobalMipsGot; - if (LIsInLocalGot || RIsInLocalGot) - return !RIsInLocalGot; - return L.Sym->GotIndex < R.Sym->GotIndex; + // All other entries go to the beginning of a dynsym in arbitrary order. + if (L.Sym->isInGot() && R.Sym->isInGot()) + return L.Sym->GotIndex < R.Sym->GotIndex; + if (!L.Sym->isInGot() && !R.Sym->isInGot()) + return false; + return !L.Sym->isInGot(); } void SymbolTableBaseSection::finalizeContents() { getParent()->Link = StrTabSec.getParent()->SectionIndex; + if (this->Type != SHT_DYNSYM) + return; + // If it is a .dynsym, there should be no local symbols, but we need // to do a few things for the dynamic linker. - if (this->Type == SHT_DYNSYM) { - // Section's Info field has the index of the first non-local symbol. - // Because the first symbol entry is a null entry, 1 is the first. - getParent()->Info = 1; - - if (InX::GnuHashTab) { - // NB: It also sorts Symbols to meet the GNU hash table requirements. - InX::GnuHashTab->addSymbols(Symbols); - } else if (Config->EMachine == EM_MIPS) { - std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); - } - size_t I = 0; - for (const SymbolTableEntry &S : Symbols) S.Sym->DynsymIndex = ++I; - return; + // Section's Info field has the index of the first non-local symbol. + // Because the first symbol entry is a null entry, 1 is the first. + getParent()->Info = 1; + + if (InX::GnuHashTab) { + // NB: It also sorts Symbols to meet the GNU hash table requirements. + InX::GnuHashTab->addSymbols(Symbols); + } else if (Config->EMachine == EM_MIPS) { + std::stable_sort(Symbols.begin(), Symbols.end(), sortMipsSymbols); } + + size_t I = 0; + for (const SymbolTableEntry &S : Symbols) + S.Sym->DynsymIndex = ++I; } // The ELF spec requires that all local symbols precede global symbols, so we // sort symbol entries in this function. (For .dynsym, we don't do that because // symbols for dynamic linking are inherently all globals.) +// +// Aside from above, we put local symbols in groups starting with the STT_FILE +// symbol. That is convenient for purpose of identifying where are local symbols +// coming from. void SymbolTableBaseSection::postThunkContents() { - if (this->Type == SHT_DYNSYM) - return; - // move all local symbols before global symbols. - auto It = std::stable_partition( + assert(this->Type == SHT_SYMTAB); + + // Move all local symbols before global symbols. + auto E = std::stable_partition( Symbols.begin(), Symbols.end(), [](const SymbolTableEntry &S) { return S.Sym->isLocal() || S.Sym->computeBinding() == STB_LOCAL; }); - size_t NumLocals = It - Symbols.begin(); + size_t NumLocals = E - Symbols.begin(); getParent()->Info = NumLocals + 1; + + // We want to group the local symbols by file. For that we rebuild the local + // part of the symbols vector. We do not need to care about the STT_FILE + // symbols, they are already naturally placed first in each group. That + // happens because STT_FILE is always the first symbol in the object and hence + // precede all other local symbols we add for a file. + MapVector<InputFile *, std::vector<SymbolTableEntry>> Arr; + for (const SymbolTableEntry &S : llvm::make_range(Symbols.begin(), E)) + Arr[S.Sym->File].push_back(S); + + auto I = Symbols.begin(); + for (std::pair<InputFile *, std::vector<SymbolTableEntry>> &P : Arr) + for (SymbolTableEntry &Entry : P.second) + *I++ = Entry; } void SymbolTableBaseSection::addSymbol(Symbol *B) { @@ -1586,6 +1964,8 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { CommonSec = dyn_cast_or_null<BssSection>(D->Section); if (CommonSec) ESym->st_shndx = SHN_COMMON; + else if (Sym->NeedsPltAddr) + ESym->st_shndx = SHN_UNDEF; else if (const OutputSection *OutSec = Sym->getOutputSection()) ESym->st_shndx = OutSec->SectionIndex; else if (isa<Defined>(Sym)) @@ -1627,9 +2007,11 @@ template <class ELFT> void SymbolTableSection<ELFT>::writeTo(uint8_t *Buf) { ESym->st_other |= STO_MIPS_PLT; if (isMicroMips()) { // Set STO_MIPS_MICROMIPS flag and less-significant bit for - // defined microMIPS symbols and shared symbols with PLT record. - if ((Sym->isDefined() && (Sym->StOther & STO_MIPS_MICROMIPS)) || - (Sym->isShared() && Sym->NeedsPltAddr)) { + // a defined microMIPS symbol and symbol should point to its + // PLT entry (in case of microMIPS, PLT entries always contain + // microMIPS code). + if (Sym->isDefined() && + ((Sym->StOther & STO_MIPS_MICROMIPS) || Sym->NeedsPltAddr)) { if (StrTabSec.isDynamic()) ESym->st_value |= 1; ESym->st_other |= STO_MIPS_MICROMIPS; @@ -1682,12 +2064,14 @@ GnuHashTableSection::GnuHashTableSection() void GnuHashTableSection::finalizeContents() { getParent()->Link = InX::DynSymTab->getParent()->SectionIndex; - // Computes bloom filter size in word size. We want to allocate 8 + // Computes bloom filter size in word size. We want to allocate 12 // bits for each symbol. It must be a power of two. - if (Symbols.empty()) + if (Symbols.empty()) { MaskWords = 1; - else - MaskWords = NextPowerOf2((Symbols.size() - 1) / Config->Wordsize); + } else { + uint64_t NumBits = Symbols.size() * 12; + MaskWords = NextPowerOf2(NumBits / (Config->Wordsize * 8)); + } Size = 16; // Header Size += Config->Wordsize * MaskWords; // Bloom filter @@ -1705,7 +2089,7 @@ void GnuHashTableSection::writeTo(uint8_t *Buf) { write32(Buf, NBuckets); write32(Buf + 4, InX::DynSymTab->getNumSymbols() - Symbols.size()); write32(Buf + 8, MaskWords); - write32(Buf + 12, getShift2()); + write32(Buf + 12, Shift2); Buf += 16; // Write a bloom filter and a hash table. @@ -1722,12 +2106,12 @@ void GnuHashTableSection::writeTo(uint8_t *Buf) { // [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2), // p.9, https://www.akkadia.org/drepper/dsohowto.pdf void GnuHashTableSection::writeBloomFilter(uint8_t *Buf) { - const unsigned C = Config->Wordsize * 8; + unsigned C = Config->Is64 ? 64 : 32; for (const Entry &Sym : Symbols) { size_t I = (Sym.Hash / C) & (MaskWords - 1); uint64_t Val = readUint(Buf + I * Config->Wordsize); Val |= uint64_t(1) << (Sym.Hash % C); - Val |= uint64_t(1) << ((Sym.Hash >> getShift2()) % C); + Val |= uint64_t(1) << ((Sym.Hash >> Shift2) % C); writeUint(Buf + I * Config->Wordsize, Val); } } @@ -1769,21 +2153,23 @@ void GnuHashTableSection::addSymbols(std::vector<SymbolTableEntry> &V) { // its type correctly. std::vector<SymbolTableEntry>::iterator Mid = std::stable_partition(V.begin(), V.end(), [](const SymbolTableEntry &S) { - // Shared symbols that this executable preempts are special. The dynamic - // linker has to look them up, so they have to be in the hash table. - if (auto *SS = dyn_cast<SharedSymbol>(S.Sym)) - return SS->CopyRelSec == nullptr && !SS->NeedsPltAddr; return !S.Sym->isDefined(); }); - if (Mid == V.end()) - return; // We chose load factor 4 for the on-disk hash table. For each hash // collision, the dynamic linker will compare a uint32_t hash value. - // Since the integer comparison is quite fast, we believe we can make - // the load factor even larger. 4 is just a conservative choice. + // Since the integer comparison is quite fast, we believe we can + // make the load factor even larger. 4 is just a conservative choice. + // + // Note that we don't want to create a zero-sized hash table because + // Android loader as of 2018 doesn't like a .gnu.hash containing such + // table. If that's the case, we create a hash table with one unused + // dummy slot. NBuckets = std::max<size_t>((V.end() - Mid) / 4, 1); + if (Mid == V.end()) + return; + for (SymbolTableEntry &Ent : llvm::make_range(Mid, V.end())) { Symbol *B = Ent.Sym; uint32_t Hash = hashGnu(B->getName()); @@ -1817,6 +2203,9 @@ void HashTableSection::finalizeContents() { } void HashTableSection::writeTo(uint8_t *Buf) { + // See comment in GnuHashTableSection::writeTo. + memset(Buf, 0, Size); + unsigned NumSymbols = InX::DynSymTab->getNumSymbols(); uint32_t *P = reinterpret_cast<uint32_t *>(Buf); @@ -1836,9 +2225,12 @@ void HashTableSection::writeTo(uint8_t *Buf) { } } -PltSection::PltSection(size_t S) - : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, ".plt"), - HeaderSize(S) { +// On PowerPC64 the lazy symbol resolvers go into the `global linkage table` +// in the .glink section, rather then the typical .plt section. +PltSection::PltSection(bool IsIplt) + : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 16, + Config->EMachine == EM_PPC64 ? ".glink" : ".plt"), + HeaderSize(IsIplt ? 0 : Target->PltHeaderSize), IsIplt(IsIplt) { // The PLT needs to be writable on SPARC as the dynamic linker will // modify the instructions in the PLT entries. if (Config->EMachine == EM_SPARCV9) @@ -1848,7 +2240,7 @@ PltSection::PltSection(size_t S) void PltSection::writeTo(uint8_t *Buf) { // At beginning of PLT but not the IPLT, we have code to call the dynamic // linker to resolve dynsyms at runtime. Write such code. - if (HeaderSize != 0) + if (!IsIplt) Target->writePltHeader(Buf); size_t Off = HeaderSize; // The IPlt is immediately after the Plt, account for this in RelOff @@ -1867,7 +2259,7 @@ void PltSection::writeTo(uint8_t *Buf) { template <class ELFT> void PltSection::addEntry(Symbol &Sym) { Sym.PltIndex = Entries.size(); RelocationBaseSection *PltRelocSection = InX::RelaPlt; - if (HeaderSize == 0) { + if (IsIplt) { PltRelocSection = InX::RelaIplt; Sym.IsInIplt = true; } @@ -1884,7 +2276,7 @@ size_t PltSection::getSize() const { // example ARM uses mapping symbols to aid disassembly void PltSection::addSymbols() { // The PLT may have symbols defined for the Header, the IPLT has no header - if (HeaderSize != 0) + if (!IsIplt) Target->addPltHeaderSymbols(*this); size_t Off = HeaderSize; for (size_t I = 0; I < Entries.size(); ++I) { @@ -1894,7 +2286,7 @@ void PltSection::addSymbols() { } unsigned PltSection::getPltRelocOff() const { - return (HeaderSize == 0) ? InX::Plt->getSize() : 0; + return IsIplt ? InX::Plt->getSize() : 0; } // The string hash function for .gdb_index. @@ -1905,16 +2297,48 @@ static uint32_t computeGdbHash(StringRef S) { return H; } -static std::vector<GdbIndexChunk::CuEntry> readCuList(DWARFContext &Dwarf) { - std::vector<GdbIndexChunk::CuEntry> Ret; +GdbIndexSection::GdbIndexSection() + : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index") {} + +// Returns the desired size of an on-disk hash table for a .gdb_index section. +// There's a tradeoff between size and collision rate. We aim 75% utilization. +size_t GdbIndexSection::computeSymtabSize() const { + return std::max<size_t>(NextPowerOf2(Symbols.size() * 4 / 3), 1024); +} + +// Compute the output section size. +void GdbIndexSection::initOutputSize() { + Size = sizeof(GdbIndexHeader) + computeSymtabSize() * 8; + + for (GdbChunk &Chunk : Chunks) + Size += Chunk.CompilationUnits.size() * 16 + Chunk.AddressAreas.size() * 20; + + // Add the constant pool size if exists. + if (!Symbols.empty()) { + GdbSymbol &Sym = Symbols.back(); + Size += Sym.NameOff + Sym.Name.size() + 1; + } +} + +static std::vector<InputSection *> getDebugInfoSections() { + std::vector<InputSection *> Ret; + for (InputSectionBase *S : InputSections) + if (InputSection *IS = dyn_cast<InputSection>(S)) + if (IS->Name == ".debug_info") + Ret.push_back(IS); + return Ret; +} + +static std::vector<GdbIndexSection::CuEntry> readCuList(DWARFContext &Dwarf) { + std::vector<GdbIndexSection::CuEntry> Ret; for (std::unique_ptr<DWARFCompileUnit> &Cu : Dwarf.compile_units()) Ret.push_back({Cu->getOffset(), Cu->getLength() + 4}); return Ret; } -static std::vector<GdbIndexChunk::AddressEntry> +static std::vector<GdbIndexSection::AddressEntry> readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) { - std::vector<GdbIndexChunk::AddressEntry> Ret; + std::vector<GdbIndexSection::AddressEntry> Ret; uint32_t CuIdx = 0; for (std::unique_ptr<DWARFCompileUnit> &Cu : Dwarf.compile_units()) { @@ -1938,218 +2362,192 @@ readAddressAreas(DWARFContext &Dwarf, InputSection *Sec) { return Ret; } -static std::vector<GdbIndexChunk::NameTypeEntry> -readPubNamesAndTypes(DWARFContext &Dwarf) { +static std::vector<GdbIndexSection::NameTypeEntry> +readPubNamesAndTypes(DWARFContext &Dwarf, uint32_t Idx) { StringRef Sec1 = Dwarf.getDWARFObj().getGnuPubNamesSection(); StringRef Sec2 = Dwarf.getDWARFObj().getGnuPubTypesSection(); - std::vector<GdbIndexChunk::NameTypeEntry> Ret; + std::vector<GdbIndexSection::NameTypeEntry> Ret; for (StringRef Sec : {Sec1, Sec2}) { DWARFDebugPubTable Table(Sec, Config->IsLE, true); - for (const DWARFDebugPubTable::Set &Set : Table.getData()) { - for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) { - CachedHashStringRef S(Ent.Name, computeGdbHash(Ent.Name)); - Ret.push_back({S, Ent.Descriptor.toBits()}); - } - } + for (const DWARFDebugPubTable::Set &Set : Table.getData()) + for (const DWARFDebugPubTable::Entry &Ent : Set.Entries) + Ret.push_back({{Ent.Name, computeGdbHash(Ent.Name)}, + (Ent.Descriptor.toBits() << 24) | Idx}); } return Ret; } -static std::vector<InputSection *> getDebugInfoSections() { - std::vector<InputSection *> Ret; - for (InputSectionBase *S : InputSections) - if (InputSection *IS = dyn_cast<InputSection>(S)) - if (IS->Name == ".debug_info") - Ret.push_back(IS); - return Ret; -} +// Create a list of symbols from a given list of symbol names and types +// by uniquifying them by name. +static std::vector<GdbIndexSection::GdbSymbol> +createSymbols(ArrayRef<std::vector<GdbIndexSection::NameTypeEntry>> NameTypes) { + typedef GdbIndexSection::GdbSymbol GdbSymbol; + typedef GdbIndexSection::NameTypeEntry NameTypeEntry; -void GdbIndexSection::fixCuIndex() { - uint32_t Idx = 0; - for (GdbIndexChunk &Chunk : Chunks) { - for (GdbIndexChunk::AddressEntry &Ent : Chunk.AddressAreas) - Ent.CuIndex += Idx; - Idx += Chunk.CompilationUnits.size(); - } -} + // The number of symbols we will handle in this function is of the order + // of millions for very large executables, so we use multi-threading to + // speed it up. + size_t NumShards = 32; + size_t Concurrency = 1; + if (ThreadsEnabled) + Concurrency = + std::min<size_t>(PowerOf2Floor(hardware_concurrency()), NumShards); -std::vector<std::vector<uint32_t>> GdbIndexSection::createCuVectors() { - std::vector<std::vector<uint32_t>> Ret; - uint32_t Idx = 0; - uint32_t Off = 0; + // A sharded map to uniquify symbols by name. + std::vector<DenseMap<CachedHashStringRef, size_t>> Map(NumShards); + size_t Shift = 32 - countTrailingZeros(NumShards); - for (GdbIndexChunk &Chunk : Chunks) { - for (GdbIndexChunk::NameTypeEntry &Ent : Chunk.NamesAndTypes) { - GdbSymbol *&Sym = Symbols[Ent.Name]; - if (!Sym) { - Sym = make<GdbSymbol>(GdbSymbol{Ent.Name.hash(), Off, Ret.size()}); - Off += Ent.Name.size() + 1; - Ret.push_back({}); - } + // Instantiate GdbSymbols while uniqufying them by name. + std::vector<std::vector<GdbSymbol>> Symbols(NumShards); + parallelForEachN(0, Concurrency, [&](size_t ThreadId) { + for (ArrayRef<NameTypeEntry> Entries : NameTypes) { + for (const NameTypeEntry &Ent : Entries) { + size_t ShardId = Ent.Name.hash() >> Shift; + if ((ShardId & (Concurrency - 1)) != ThreadId) + continue; - // gcc 5.4.1 produces a buggy .debug_gnu_pubnames that contains - // duplicate entries, so we want to dedup them. - std::vector<uint32_t> &Vec = Ret[Sym->CuVectorIndex]; - uint32_t Val = (Ent.Type << 24) | Idx; - if (Vec.empty() || Vec.back() != Val) - Vec.push_back(Val); + size_t &Idx = Map[ShardId][Ent.Name]; + if (Idx) { + Symbols[ShardId][Idx - 1].CuVector.push_back(Ent.Type); + continue; + } + + Idx = Symbols[ShardId].size() + 1; + Symbols[ShardId].push_back({Ent.Name, {Ent.Type}, 0, 0}); + } } - Idx += Chunk.CompilationUnits.size(); + }); + + size_t NumSymbols = 0; + for (ArrayRef<GdbSymbol> V : Symbols) + NumSymbols += V.size(); + + // The return type is a flattened vector, so we'll copy each vector + // contents to Ret. + std::vector<GdbSymbol> Ret; + Ret.reserve(NumSymbols); + for (std::vector<GdbSymbol> &Vec : Symbols) + for (GdbSymbol &Sym : Vec) + Ret.push_back(std::move(Sym)); + + // CU vectors and symbol names are adjacent in the output file. + // We can compute their offsets in the output file now. + size_t Off = 0; + for (GdbSymbol &Sym : Ret) { + Sym.CuVectorOff = Off; + Off += (Sym.CuVector.size() + 1) * 4; + } + for (GdbSymbol &Sym : Ret) { + Sym.NameOff = Off; + Off += Sym.Name.size() + 1; } - StringPoolSize = Off; return Ret; } -template <class ELFT> GdbIndexSection *elf::createGdbIndex() { - // Gather debug info to create a .gdb_index section. +// Returns a newly-created .gdb_index section. +template <class ELFT> GdbIndexSection *GdbIndexSection::create() { std::vector<InputSection *> Sections = getDebugInfoSections(); - std::vector<GdbIndexChunk> Chunks(Sections.size()); - - parallelForEachN(0, Chunks.size(), [&](size_t I) { - ObjFile<ELFT> *File = Sections[I]->getFile<ELFT>(); - DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(File)); - - Chunks[I].DebugInfoSec = Sections[I]; - Chunks[I].CompilationUnits = readCuList(Dwarf); - Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]); - Chunks[I].NamesAndTypes = readPubNamesAndTypes(Dwarf); - }); // .debug_gnu_pub{names,types} are useless in executables. // They are present in input object files solely for creating - // a .gdb_index. So we can remove it from the output. + // a .gdb_index. So we can remove them from the output. for (InputSectionBase *S : InputSections) if (S->Name == ".debug_gnu_pubnames" || S->Name == ".debug_gnu_pubtypes") S->Live = false; - // Create a .gdb_index and returns it. - return make<GdbIndexSection>(std::move(Chunks)); -} + std::vector<GdbChunk> Chunks(Sections.size()); + std::vector<std::vector<NameTypeEntry>> NameTypes(Sections.size()); -static size_t getCuSize(ArrayRef<GdbIndexChunk> Arr) { - size_t Ret = 0; - for (const GdbIndexChunk &D : Arr) - Ret += D.CompilationUnits.size(); - return Ret; -} - -static size_t getAddressAreaSize(ArrayRef<GdbIndexChunk> Arr) { - size_t Ret = 0; - for (const GdbIndexChunk &D : Arr) - Ret += D.AddressAreas.size(); - return Ret; -} - -std::vector<GdbSymbol *> GdbIndexSection::createGdbSymtab() { - uint32_t Size = NextPowerOf2(Symbols.size() * 4 / 3); - if (Size < 1024) - Size = 1024; - - uint32_t Mask = Size - 1; - std::vector<GdbSymbol *> Ret(Size); + parallelForEachN(0, Sections.size(), [&](size_t I) { + ObjFile<ELFT> *File = Sections[I]->getFile<ELFT>(); + DWARFContext Dwarf(make_unique<LLDDwarfObj<ELFT>>(File)); - for (auto &KV : Symbols) { - GdbSymbol *Sym = KV.second; - uint32_t I = Sym->NameHash & Mask; - uint32_t Step = ((Sym->NameHash * 17) & Mask) | 1; + Chunks[I].Sec = Sections[I]; + Chunks[I].CompilationUnits = readCuList(Dwarf); + Chunks[I].AddressAreas = readAddressAreas(Dwarf, Sections[I]); + NameTypes[I] = readPubNamesAndTypes(Dwarf, I); + }); - while (Ret[I]) - I = (I + Step) & Mask; - Ret[I] = Sym; - } + auto *Ret = make<GdbIndexSection>(); + Ret->Chunks = std::move(Chunks); + Ret->Symbols = createSymbols(NameTypes); + Ret->initOutputSize(); return Ret; } -GdbIndexSection::GdbIndexSection(std::vector<GdbIndexChunk> &&C) - : SyntheticSection(0, SHT_PROGBITS, 1, ".gdb_index"), Chunks(std::move(C)) { - fixCuIndex(); - CuVectors = createCuVectors(); - GdbSymtab = createGdbSymtab(); - - // Compute offsets early to know the section size. - // Each chunk size needs to be in sync with what we write in writeTo. - CuTypesOffset = CuListOffset + getCuSize(Chunks) * 16; - SymtabOffset = CuTypesOffset + getAddressAreaSize(Chunks) * 20; - ConstantPoolOffset = SymtabOffset + GdbSymtab.size() * 8; - - size_t Off = 0; - for (ArrayRef<uint32_t> Vec : CuVectors) { - CuVectorOffsets.push_back(Off); - Off += (Vec.size() + 1) * 4; - } - StringPoolOffset = ConstantPoolOffset + Off; -} - -size_t GdbIndexSection::getSize() const { - return StringPoolOffset + StringPoolSize; -} - void GdbIndexSection::writeTo(uint8_t *Buf) { - // Write the section header. - write32le(Buf, 7); - write32le(Buf + 4, CuListOffset); - write32le(Buf + 8, CuTypesOffset); - write32le(Buf + 12, CuTypesOffset); - write32le(Buf + 16, SymtabOffset); - write32le(Buf + 20, ConstantPoolOffset); - Buf += 24; + // Write the header. + auto *Hdr = reinterpret_cast<GdbIndexHeader *>(Buf); + uint8_t *Start = Buf; + Hdr->Version = 7; + Buf += sizeof(*Hdr); // Write the CU list. - for (GdbIndexChunk &D : Chunks) { - for (GdbIndexChunk::CuEntry &Cu : D.CompilationUnits) { - write64le(Buf, D.DebugInfoSec->OutSecOff + Cu.CuOffset); + Hdr->CuListOff = Buf - Start; + for (GdbChunk &Chunk : Chunks) { + for (CuEntry &Cu : Chunk.CompilationUnits) { + write64le(Buf, Chunk.Sec->OutSecOff + Cu.CuOffset); write64le(Buf + 8, Cu.CuLength); Buf += 16; } } // Write the address area. - for (GdbIndexChunk &D : Chunks) { - for (GdbIndexChunk::AddressEntry &E : D.AddressAreas) { - uint64_t BaseAddr = - E.Section->getParent()->Addr + E.Section->getOffset(0); + Hdr->CuTypesOff = Buf - Start; + Hdr->AddressAreaOff = Buf - Start; + uint32_t CuOff = 0; + for (GdbChunk &Chunk : Chunks) { + for (AddressEntry &E : Chunk.AddressAreas) { + uint64_t BaseAddr = E.Section->getVA(0); write64le(Buf, BaseAddr + E.LowAddress); write64le(Buf + 8, BaseAddr + E.HighAddress); - write32le(Buf + 16, E.CuIndex); + write32le(Buf + 16, E.CuIndex + CuOff); Buf += 20; } + CuOff += Chunk.CompilationUnits.size(); } - // Write the symbol table. - for (GdbSymbol *Sym : GdbSymtab) { - if (Sym) { - write32le(Buf, Sym->NameOffset + StringPoolOffset - ConstantPoolOffset); - write32le(Buf + 4, CuVectorOffsets[Sym->CuVectorIndex]); - } - Buf += 8; + // Write the on-disk open-addressing hash table containing symbols. + Hdr->SymtabOff = Buf - Start; + size_t SymtabSize = computeSymtabSize(); + uint32_t Mask = SymtabSize - 1; + + for (GdbSymbol &Sym : Symbols) { + uint32_t H = Sym.Name.hash(); + uint32_t I = H & Mask; + uint32_t Step = ((H * 17) & Mask) | 1; + + while (read32le(Buf + I * 8)) + I = (I + Step) & Mask; + + write32le(Buf + I * 8, Sym.NameOff); + write32le(Buf + I * 8 + 4, Sym.CuVectorOff); } + Buf += SymtabSize * 8; + + // Write the string pool. + Hdr->ConstantPoolOff = Buf - Start; + for (GdbSymbol &Sym : Symbols) + memcpy(Buf + Sym.NameOff, Sym.Name.data(), Sym.Name.size()); + // Write the CU vectors. - for (ArrayRef<uint32_t> Vec : CuVectors) { - write32le(Buf, Vec.size()); + for (GdbSymbol &Sym : Symbols) { + write32le(Buf, Sym.CuVector.size()); Buf += 4; - for (uint32_t Val : Vec) { + for (uint32_t Val : Sym.CuVector) { write32le(Buf, Val); Buf += 4; } } - - // Write the string pool. - for (auto &KV : Symbols) { - CachedHashStringRef S = KV.first; - GdbSymbol *Sym = KV.second; - size_t Off = Sym->NameOffset; - memcpy(Buf + Off, S.val().data(), S.size()); - Buf[Off + S.size()] = '\0'; - } } bool GdbIndexSection::empty() const { return !Out::DebugInfo; } EhFrameHeader::EhFrameHeader() - : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 1, ".eh_frame_hdr") {} + : SyntheticSection(SHF_ALLOC, SHT_PROGBITS, 4, ".eh_frame_hdr") {} // .eh_frame_hdr contains a binary search table of pointers to FDEs. // Each entry of the search table consists of two values, @@ -2160,14 +2558,6 @@ void EhFrameHeader::writeTo(uint8_t *Buf) { std::vector<FdeData> Fdes = InX::EhFrame->getFdeData(); - // Sort the FDE list by their PC and uniqueify. Usually there is only - // one FDE for a PC (i.e. function), but if ICF merges two functions - // into one, there can be more than one FDEs pointing to the address. - auto Less = [](const FdeData &A, const FdeData &B) { return A.Pc < B.Pc; }; - std::stable_sort(Fdes.begin(), Fdes.end(), Less); - auto Eq = [](const FdeData &A, const FdeData &B) { return A.Pc == B.Pc; }; - Fdes.erase(std::unique(Fdes.begin(), Fdes.end(), Eq), Fdes.end()); - Buf[0] = 1; Buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; Buf[2] = DW_EH_PE_udata4; @@ -2176,10 +2566,9 @@ void EhFrameHeader::writeTo(uint8_t *Buf) { write32(Buf + 8, Fdes.size()); Buf += 12; - uint64_t VA = this->getVA(); for (FdeData &Fde : Fdes) { - write32(Buf, Fde.Pc - VA); - write32(Buf + 4, Fde.FdeVA - VA); + write32(Buf, Fde.PcRel); + write32(Buf + 4, Fde.FdeVARel); Buf += 8; } } @@ -2289,11 +2678,9 @@ VersionNeedSection<ELFT>::VersionNeedSection() NextIndex = getVerDefNum() + 1; } -template <class ELFT> -void VersionNeedSection<ELFT>::addSymbol(SharedSymbol *SS) { - SharedFile<ELFT> &File = SS->getFile<ELFT>(); - const typename ELFT::Verdef *Ver = File.Verdefs[SS->VerdefIndex]; - if (!Ver) { +template <class ELFT> void VersionNeedSection<ELFT>::addSymbol(Symbol *SS) { + auto &File = cast<SharedFile<ELFT>>(*SS->File); + if (SS->VerdefIndex == VER_NDX_GLOBAL) { SS->VersionId = VER_NDX_GLOBAL; return; } @@ -2303,7 +2690,9 @@ void VersionNeedSection<ELFT>::addSymbol(SharedSymbol *SS) { // for the soname. if (File.VerdefMap.empty()) Needed.push_back({&File, InX::DynStrTab->addString(File.SoName)}); + const typename ELFT::Verdef *Ver = File.Verdefs[SS->VerdefIndex]; typename SharedFile<ELFT>::NeededVer &NV = File.VerdefMap[Ver]; + // If we don't already know that we need an Elf_Vernaux for this Elf_Verdef, // prepare to create one by allocating a version identifier and creating a // dynstr entry for the version name. @@ -2429,10 +2818,8 @@ void MergeNoTailSection::finalizeContents() { parallelForEachN(0, Concurrency, [&](size_t ThreadId) { for (MergeInputSection *Sec : Sections) { for (size_t I = 0, E = Sec->Pieces.size(); I != E; ++I) { - if (!Sec->Pieces[I].Live) - continue; size_t ShardId = getShardId(Sec->Pieces[I].Hash); - if ((ShardId & (Concurrency - 1)) == ThreadId) + if ((ShardId & (Concurrency - 1)) == ThreadId && Sec->Pieces[I].Live) Sec->Pieces[I].OutputOff = Shards[ShardId].add(Sec->getData(I)); } } @@ -2469,11 +2856,20 @@ static MergeSyntheticSection *createMergeSynthetic(StringRef Name, return make<MergeNoTailSection>(Name, Type, Flags, Alignment); } -// Debug sections may be compressed by zlib. Uncompress if exists. +// Debug sections may be compressed by zlib. Decompress if exists. void elf::decompressSections() { + parallelForEach(InputSections, + [](InputSectionBase *Sec) { Sec->maybeDecompress(); }); +} + +template <class ELFT> void elf::splitSections() { + // splitIntoPieces needs to be called on each MergeInputSection + // before calling finalizeContents(). parallelForEach(InputSections, [](InputSectionBase *Sec) { - if (Sec->Live) - Sec->maybeUncompress(); + if (auto *S = dyn_cast<MergeInputSection>(Sec)) + S->splitIntoPieces(); + else if (auto *Eh = dyn_cast<EhInputSection>(Sec)) + Eh->split<ELFT>(); }); } @@ -2485,14 +2881,6 @@ void elf::decompressSections() { // that it replaces. It then finalizes each synthetic section in order // to compute an output offset for each piece of each input section. void elf::mergeSections() { - // splitIntoPieces needs to be called on each MergeInputSection - // before calling finalizeContents(). Do that first. - parallelForEach(InputSections, [](InputSectionBase *Sec) { - if (Sec->Live) - if (auto *S = dyn_cast<MergeInputSection>(Sec)) - S->splitIntoPieces(); - }); - std::vector<MergeSyntheticSection *> MergeSections; for (InputSectionBase *&S : InputSections) { MergeInputSection *MS = dyn_cast<MergeInputSection>(S); @@ -2554,8 +2942,7 @@ ARMExidxSentinelSection::ARMExidxSentinelSection() // address described by any other table entry. void ARMExidxSentinelSection::writeTo(uint8_t *Buf) { assert(Highest); - uint64_t S = - Highest->getParent()->Addr + Highest->getOffset(Highest->getSize()); + uint64_t S = Highest->getVA(Highest->getSize()); uint64_t P = getVA(); Target->relocateOne(Buf, R_ARM_PREL31, S - P); write32le(Buf + 4, 1); @@ -2563,15 +2950,16 @@ void ARMExidxSentinelSection::writeTo(uint8_t *Buf) { // The sentinel has to be removed if there are no other .ARM.exidx entries. bool ARMExidxSentinelSection::empty() const { - OutputSection *OS = getParent(); - for (auto *B : OS->SectionCommands) - if (auto *ISD = dyn_cast<InputSectionDescription>(B)) - for (auto *S : ISD->Sections) - if (!isa<ARMExidxSentinelSection>(S)) - return false; + for (InputSection *IS : getInputSections(getParent())) + if (!isa<ARMExidxSentinelSection>(IS)) + return false; return true; } +bool ARMExidxSentinelSection::classof(const SectionBase *D) { + return D->kind() == InputSectionBase::Synthetic && D->Type == SHT_ARM_EXIDX; +} + ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off) : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, Config->Wordsize, ".text.thunk") { @@ -2580,16 +2968,13 @@ ThunkSection::ThunkSection(OutputSection *OS, uint64_t Off) } void ThunkSection::addThunk(Thunk *T) { - uint64_t Off = alignTo(Size, T->Alignment); - T->Offset = Off; Thunks.push_back(T); T->addSymbols(*this); - Size = Off + T->size(); } void ThunkSection::writeTo(uint8_t *Buf) { - for (const Thunk *T : Thunks) - T->writeTo(Buf + T->Offset, *this); + for (Thunk *T : Thunks) + T->writeTo(Buf + T->Offset); } InputSection *ThunkSection::getTargetInputSection() const { @@ -2599,6 +2984,20 @@ InputSection *ThunkSection::getTargetInputSection() const { return T->getTargetInputSection(); } +bool ThunkSection::assignOffsets() { + uint64_t Off = 0; + for (Thunk *T : Thunks) { + Off = alignTo(Off, T->Alignment); + T->setOffset(Off); + uint32_t Size = T->size(); + T->getThunkTargetSym()->Size = Size; + Off += Size; + } + bool Changed = Off != Size; + Size = Off; + return Changed; +} + InputSection *InX::ARMAttributes; BssSection *InX::Bss; BssSection *InX::BssRelRo; @@ -2620,16 +3019,22 @@ MipsRldMapSection *InX::MipsRldMap; PltSection *InX::Plt; PltSection *InX::Iplt; RelocationBaseSection *InX::RelaDyn; +RelrBaseSection *InX::RelrDyn; RelocationBaseSection *InX::RelaPlt; RelocationBaseSection *InX::RelaIplt; StringTableSection *InX::ShStrTab; StringTableSection *InX::StrTab; SymbolTableBaseSection *InX::SymTab; -template GdbIndexSection *elf::createGdbIndex<ELF32LE>(); -template GdbIndexSection *elf::createGdbIndex<ELF32BE>(); -template GdbIndexSection *elf::createGdbIndex<ELF64LE>(); -template GdbIndexSection *elf::createGdbIndex<ELF64BE>(); +template GdbIndexSection *GdbIndexSection::create<ELF32LE>(); +template GdbIndexSection *GdbIndexSection::create<ELF32BE>(); +template GdbIndexSection *GdbIndexSection::create<ELF64LE>(); +template GdbIndexSection *GdbIndexSection::create<ELF64BE>(); + +template void elf::splitSections<ELF32LE>(); +template void elf::splitSections<ELF32BE>(); +template void elf::splitSections<ELF64LE>(); +template void elf::splitSections<ELF64BE>(); template void EhFrameSection::addSection<ELF32LE>(InputSectionBase *); template void EhFrameSection::addSection<ELF32BE>(InputSectionBase *); @@ -2641,6 +3046,11 @@ template void PltSection::addEntry<ELF32BE>(Symbol &Sym); template void PltSection::addEntry<ELF64LE>(Symbol &Sym); template void PltSection::addEntry<ELF64BE>(Symbol &Sym); +template void MipsGotSection::build<ELF32LE>(); +template void MipsGotSection::build<ELF32BE>(); +template void MipsGotSection::build<ELF64LE>(); +template void MipsGotSection::build<ELF64BE>(); + template class elf::MipsAbiFlagsSection<ELF32LE>; template class elf::MipsAbiFlagsSection<ELF32BE>; template class elf::MipsAbiFlagsSection<ELF64LE>; @@ -2671,6 +3081,11 @@ template class elf::AndroidPackedRelocationSection<ELF32BE>; template class elf::AndroidPackedRelocationSection<ELF64LE>; template class elf::AndroidPackedRelocationSection<ELF64BE>; +template class elf::RelrSection<ELF32LE>; +template class elf::RelrSection<ELF32BE>; +template class elf::RelrSection<ELF64LE>; +template class elf::RelrSection<ELF64BE>; + template class elf::SymbolTableSection<ELF32LE>; template class elf::SymbolTableSection<ELF32BE>; template class elf::SymbolTableSection<ELF64LE>; |
