diff options
Diffstat (limited to 'ELF/Writer.cpp')
-rw-r--r-- | ELF/Writer.cpp | 1609 |
1 files changed, 880 insertions, 729 deletions
diff --git a/ELF/Writer.cpp b/ELF/Writer.cpp index 1853f99bc6007..c7a3cae49ae65 100644 --- a/ELF/Writer.cpp +++ b/ELF/Writer.cpp @@ -8,22 +8,22 @@ //===----------------------------------------------------------------------===// #include "Writer.h" +#include "AArch64ErrataFix.h" #include "Config.h" #include "Filesystem.h" #include "LinkerScript.h" #include "MapFile.h" -#include "Memory.h" #include "OutputSections.h" #include "Relocations.h" #include "Strings.h" #include "SymbolTable.h" +#include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" -#include "Threads.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" -#include "llvm/Support/FileOutputBuffer.h" -#include "llvm/Support/raw_ostream.h" #include <climits> using namespace llvm; @@ -39,6 +39,7 @@ namespace { // The writer writes a SymbolTable result to a file. template <class ELFT> class Writer { public: + Writer() : Buffer(errorHandler().OutputBuffer) {} typedef typename ELFT::Shdr Elf_Shdr; typedef typename ELFT::Ehdr Elf_Ehdr; typedef typename ELFT::Phdr Elf_Phdr; @@ -46,43 +47,38 @@ public: void run(); private: - void clearOutputSections(); - void createSyntheticSections(); void copyLocalSymbols(); void addSectionSymbols(); - void addReservedSymbols(); - void createSections(); void forEachRelSec(std::function<void(InputSectionBase &)> Fn); void sortSections(); + void resolveShfLinkOrder(); + void sortInputSections(); void finalizeSections(); void addPredefinedSections(); + void setReservedSymbolSections(); - std::vector<PhdrEntry> createPhdrs(); + std::vector<PhdrEntry *> createPhdrs(); void removeEmptyPTLoad(); - void addPtArmExid(std::vector<PhdrEntry> &Phdrs); + void addPtArmExid(std::vector<PhdrEntry *> &Phdrs); void assignFileOffsets(); void assignFileOffsetsBinary(); void setPhdrs(); void fixSectionAlignments(); - void fixPredefinedSymbols(); void openFile(); + void writeTrapInstr(); void writeHeader(); void writeSections(); void writeSectionsBinary(); void writeBuildId(); - std::unique_ptr<FileOutputBuffer> Buffer; - - OutputSectionFactory Factory; + std::unique_ptr<FileOutputBuffer> &Buffer; void addRelIpltSymbols(); void addStartEndSymbols(); void addStartStopSymbols(OutputSection *Sec); uint64_t getEntryAddr(); - OutputSection *findSectionInScript(StringRef Name); - OutputSectionCommand *findSectionCommand(StringRef Name); - std::vector<PhdrEntry> Phdrs; + std::vector<PhdrEntry *> Phdrs; uint64_t FileSize; uint64_t SectionHeaderOff; @@ -91,49 +87,60 @@ private: }; } // anonymous namespace -StringRef elf::getOutputSectionName(StringRef Name) { +StringRef elf::getOutputSectionName(InputSectionBase *S) { // ".zdebug_" is a prefix for ZLIB-compressed sections. // Because we decompressed input sections, we want to remove 'z'. - if (Name.startswith(".zdebug_")) - return Saver.save("." + Name.substr(2)); + if (S->Name.startswith(".zdebug_")) + return Saver.save("." + S->Name.substr(2)); if (Config->Relocatable) - return Name; + return S->Name; + + // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want + // to emit .rela.text.foo as .rela.text.bar for consistency (this is not + // technically required, but not doing it is odd). This code guarantees that. + if ((S->Type == SHT_REL || S->Type == SHT_RELA) && + !isa<SyntheticSection>(S)) { + OutputSection *Out = + cast<InputSection>(S)->getRelocatedSection()->getOutputSection(); + if (S->Type == SHT_RELA) + return Saver.save(".rela" + Out->Name); + return Saver.save(".rel" + Out->Name); + } for (StringRef V : {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.", ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.", ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."}) { StringRef Prefix = V.drop_back(); - if (Name.startswith(V) || Name == Prefix) + if (S->Name.startswith(V) || S->Name == Prefix) return Prefix; } // CommonSection is identified as "COMMON" in linker scripts. // By default, it should go to .bss section. - if (Name == "COMMON") + if (S->Name == "COMMON") return ".bss"; - return Name; + return S->Name; } -template <class ELFT> static bool needsInterpSection() { - return !Symtab<ELFT>::X->getSharedFiles().empty() && - !Config->DynamicLinker.empty() && !Script->ignoreInterpSection(); +static bool needsInterpSection() { + return !SharedFiles.empty() && !Config->DynamicLinker.empty() && + Script->needsInterpSection(); } template <class ELFT> void elf::writeResult() { Writer<ELFT>().run(); } template <class ELFT> void Writer<ELFT>::removeEmptyPTLoad() { - auto I = std::remove_if(Phdrs.begin(), Phdrs.end(), [&](const PhdrEntry &P) { - if (P.p_type != PT_LOAD) + llvm::erase_if(Phdrs, [&](const PhdrEntry *P) { + if (P->p_type != PT_LOAD) return false; - if (!P.First) + if (!P->FirstSec) return true; - uint64_t Size = P.Last->Addr + P.Last->Size - P.First->Addr; + uint64_t Size = P->LastSec->Addr + P->LastSec->Size - P->FirstSec->Addr; return Size == 0; }); - Phdrs.erase(I, Phdrs.end()); } template <class ELFT> static void combineEhFrameSections() { @@ -142,7 +149,7 @@ template <class ELFT> static void combineEhFrameSections() { if (!ES || !ES->Live) continue; - In<ELFT>::EhFrame->addSection(ES); + InX::EhFrame->addSection<ELFT>(ES); S = nullptr; } @@ -150,130 +157,88 @@ template <class ELFT> static void combineEhFrameSections() { V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); } -template <class ELFT> void Writer<ELFT>::clearOutputSections() { - // Clear the OutputSections to make sure it is not used anymore. Any - // code from this point on should be using the linker script - // commands. - for (OutputSection *Sec : OutputSections) - Sec->Sections.clear(); - OutputSections.clear(); +template <class ELFT> +static Defined *addOptionalRegular(StringRef Name, SectionBase *Sec, + uint64_t Val, uint8_t StOther = STV_HIDDEN, + uint8_t Binding = STB_GLOBAL) { + Symbol *S = Symtab->find(Name); + if (!S || S->isDefined()) + return nullptr; + Symbol *Sym = Symtab->addRegular<ELFT>(Name, StOther, STT_NOTYPE, Val, + /*Size=*/0, Binding, Sec, + /*File=*/nullptr); + return cast<Defined>(Sym); } -// The main function of the writer. -template <class ELFT> void Writer<ELFT>::run() { - // Create linker-synthesized sections such as .got or .plt. - // Such sections are of type input section. - createSyntheticSections(); - - if (!Config->Relocatable) - combineEhFrameSections<ELFT>(); - - // We need to create some reserved symbols such as _end. Create them. - if (!Config->Relocatable) - addReservedSymbols(); - - // Create output sections. - if (Script->Opt.HasSections) { - // If linker script contains SECTIONS commands, let it create sections. - Script->processCommands(Factory); - - // Linker scripts may have left some input sections unassigned. - // Assign such sections using the default rule. - Script->addOrphanSections(Factory); - } else { - // If linker script does not contain SECTIONS commands, create - // output sections by default rules. We still need to give the - // linker script a chance to run, because it might contain - // non-SECTIONS commands such as ASSERT. - Script->processCommands(Factory); - createSections(); - } - clearOutputSections(); - - if (Config->Discard != DiscardPolicy::All) - copyLocalSymbols(); - - if (Config->CopyRelocs) - addSectionSymbols(); - - // Now that we have a complete set of output sections. This function - // completes section contents. For example, we need to add strings - // to the string table, and add entries to .got and .plt. - // finalizeSections does that. - finalizeSections(); - if (ErrorCount) - return; - - if (!Script->Opt.HasSections && !Config->Relocatable) - fixSectionAlignments(); - - // If -compressed-debug-sections is specified, we need to compress - // .debug_* sections. Do it right now because it changes the size of - // output sections. - parallelForEach( - OutputSectionCommands.begin(), OutputSectionCommands.end(), - [](OutputSectionCommand *Cmd) { Cmd->maybeCompress<ELFT>(); }); - - Script->assignAddresses(); - Script->allocateHeaders(Phdrs); - - // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a - // 0 sized region. This has to be done late since only after assignAddresses - // we know the size of the sections. - removeEmptyPTLoad(); - - if (!Config->OFormatBinary) - assignFileOffsets(); - else - assignFileOffsetsBinary(); +// The linker is expected to define some symbols depending on +// the linking result. This function defines such symbols. +template <class ELFT> void elf::addReservedSymbols() { + if (Config->EMachine == EM_MIPS) { + // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer + // so that it points to an absolute address which by default is relative + // to GOT. Default offset is 0x7ff0. + // See "Global Data Symbols" in Chapter 6 in the following document: + // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf + ElfSym::MipsGp = Symtab->addAbsolute<ELFT>("_gp", STV_HIDDEN, STB_GLOBAL); - setPhdrs(); + // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between + // start of function and 'gp' pointer into GOT. + if (Symtab->find("_gp_disp")) + ElfSym::MipsGpDisp = + Symtab->addAbsolute<ELFT>("_gp_disp", STV_HIDDEN, STB_GLOBAL); - if (Config->Relocatable) { - for (OutputSectionCommand *Cmd : OutputSectionCommands) - Cmd->Sec->Addr = 0; - } else { - fixPredefinedSymbols(); + // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' + // pointer. This symbol is used in the code generated by .cpload pseudo-op + // in case of using -mno-shared option. + // https://sourceware.org/ml/binutils/2004-12/msg00094.html + if (Symtab->find("__gnu_local_gp")) + ElfSym::MipsLocalGp = + Symtab->addAbsolute<ELFT>("__gnu_local_gp", STV_HIDDEN, STB_GLOBAL); } - // It does not make sense try to open the file if we have error already. - if (ErrorCount) - return; - // Write the result down to a file. - openFile(); - if (ErrorCount) - return; + ElfSym::GlobalOffsetTable = addOptionalRegular<ELFT>( + "_GLOBAL_OFFSET_TABLE_", Out::ElfHeader, Target->GotBaseSymOff); - if (!Config->OFormatBinary) { - writeHeader(); - writeSections(); - } else { - writeSectionsBinary(); - } + // __ehdr_start is the location of ELF file headers. Note that we define + // this symbol unconditionally even when using a linker script, which + // differs from the behavior implemented by GNU linker which only define + // this symbol if ELF headers are in the memory mapped segment. + // __executable_start is not documented, but the expectation of at + // least the android libc is that it points to the elf header too. + // __dso_handle symbol is passed to cxa_finalize as a marker to identify + // each DSO. The address of the symbol doesn't matter as long as they are + // different in different DSOs, so we chose the start address of the DSO. + for (const char *Name : + {"__ehdr_start", "__executable_start", "__dso_handle"}) + addOptionalRegular<ELFT>(Name, Out::ElfHeader, 0, STV_HIDDEN); - // Backfill .note.gnu.build-id section content. This is done at last - // because the content is usually a hash value of the entire output file. - writeBuildId(); - if (ErrorCount) + // If linker script do layout we do not need to create any standart symbols. + if (Script->HasSectionsCommand) return; - // Handle -Map option. - writeMapFile<ELFT>(OutputSectionCommands); - if (ErrorCount) - return; + auto Add = [](StringRef S, int64_t Pos) { + return addOptionalRegular<ELFT>(S, Out::ElfHeader, Pos, STV_DEFAULT); + }; - if (auto EC = Buffer->commit()) - error("failed to write to the output file: " + EC.message()); + ElfSym::Bss = Add("__bss_start", 0); + ElfSym::End1 = Add("end", -1); + ElfSym::End2 = Add("_end", -1); + ElfSym::Etext1 = Add("etext", -1); + ElfSym::Etext2 = Add("_etext", -1); + ElfSym::Edata1 = Add("edata", -1); + ElfSym::Edata2 = Add("_edata", -1); +} - // Flush the output streams and exit immediately. A full shutdown - // is a good test that we are keeping track of all allocated memory, - // but actually freeing it is a waste of time in a regular linker run. - if (Config->ExitEarly) - exitLld(0); +static OutputSection *findSection(StringRef Name) { + for (BaseCommand *Base : Script->SectionCommands) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + if (Sec->Name == Name) + return Sec; + return nullptr; } // Initialize Out members. -template <class ELFT> void Writer<ELFT>::createSyntheticSections() { +template <class ELFT> static void createSyntheticSections() { // Initialize all pointers with NULL. This is needed because // you can call lld::elf::main more than once as a library. memset(&Out::First, 0, sizeof(Out)); @@ -282,16 +247,19 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { InX::DynStrTab = make<StringTableSection>(".dynstr", true); InX::Dynamic = make<DynamicSection<ELFT>>(); - In<ELFT>::RelaDyn = make<RelocationSection<ELFT>>( - Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + if (Config->AndroidPackDynRelocs) { + InX::RelaDyn = make<AndroidPackedRelocationSection<ELFT>>( + Config->IsRela ? ".rela.dyn" : ".rel.dyn"); + } else { + InX::RelaDyn = make<RelocationSection<ELFT>>( + Config->IsRela ? ".rela.dyn" : ".rel.dyn", Config->ZCombreloc); + } InX::ShStrTab = make<StringTableSection>(".shstrtab", false); - Out::ElfHeader = make<OutputSection>("", 0, SHF_ALLOC); - Out::ElfHeader->Size = sizeof(Elf_Ehdr); Out::ProgramHeaders = make<OutputSection>("", 0, SHF_ALLOC); - Out::ProgramHeaders->updateAlignment(Config->Wordsize); + Out::ProgramHeaders->Alignment = Config->Wordsize; - if (needsInterpSection<ELFT>()) { + if (needsInterpSection()) { InX::Interp = createInterpSection(); Add(InX::Interp); } else { @@ -308,20 +276,21 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { Add(InX::BuildId); } - InX::Common = createCommonSection<ELFT>(); - if (InX::Common) - Add(InX::Common); - - InX::Bss = make<BssSection>(".bss"); + InX::Bss = make<BssSection>(".bss", 0, 1); Add(InX::Bss); - InX::BssRelRo = make<BssSection>(".bss.rel.ro"); + + // If there is a SECTIONS command and a .data.rel.ro section name use name + // .data.rel.ro.bss so that we match in the .data.rel.ro output section. + // This makes sure our relro is contiguous. + bool HasDataRelRo = + Script->HasSectionsCommand && findSection(".data.rel.ro"); + InX::BssRelRo = make<BssSection>( + HasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); Add(InX::BssRelRo); // Add MIPS-specific sections. - bool HasDynSymTab = !Symtab<ELFT>::X->getSharedFiles().empty() || - Config->Pic || Config->ExportDynamic; if (Config->EMachine == EM_MIPS) { - if (!Config->Shared && HasDynSymTab) { + if (!Config->Shared && Config->HasDynSymTab) { InX::MipsRldMap = make<MipsRldMapSection>(); Add(InX::MipsRldMap); } @@ -333,7 +302,7 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { Add(Sec); } - if (HasDynSymTab) { + if (Config->HasDynSymTab) { InX::DynSymTab = make<SymbolTableSection<ELFT>>(*InX::DynStrTab); Add(InX::DynSymTab); @@ -354,13 +323,13 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { } if (Config->SysvHash) { - In<ELFT>::HashTab = make<HashTableSection<ELFT>>(); - Add(In<ELFT>::HashTab); + InX::HashTab = make<HashTableSection>(); + Add(InX::HashTab); } Add(InX::Dynamic); Add(InX::DynStrTab); - Add(In<ELFT>::RelaDyn); + Add(InX::RelaDyn); } // Add .got. MIPS' .got is so different from the other archs, @@ -385,16 +354,22 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. - In<ELFT>::RelaPlt = make<RelocationSection<ELFT>>( + InX::RelaPlt = make<RelocationSection<ELFT>>( Config->IsRela ? ".rela.plt" : ".rel.plt", false /*Sort*/); - Add(In<ELFT>::RelaPlt); + Add(InX::RelaPlt); // The RelaIplt immediately follows .rel.plt (.rel.dyn for ARM) to ensure - // that the IRelative relocations are processed last by the dynamic loader - In<ELFT>::RelaIplt = make<RelocationSection<ELFT>>( - (Config->EMachine == EM_ARM) ? ".rel.dyn" : In<ELFT>::RelaPlt->Name, + // that the IRelative relocations are processed last by the dynamic loader. + // We cannot place the iplt section in .rel.dyn when Android relocation + // packing is enabled because that would cause a section type mismatch. + // However, because the Android dynamic loader reads .rel.plt after .rel.dyn, + // we can get the desired behaviour by placing the iplt section in .rel.plt. + InX::RelaIplt = make<RelocationSection<ELFT>>( + (Config->EMachine == EM_ARM && !Config->AndroidPackDynRelocs) + ? ".rel.dyn" + : InX::RelaPlt->Name, false /*Sort*/); - Add(In<ELFT>::RelaIplt); + Add(InX::RelaIplt); InX::Plt = make<PltSection>(Target->PltHeaderSize); Add(InX::Plt); @@ -403,11 +378,11 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { if (!Config->Relocatable) { if (Config->EhFrameHdr) { - In<ELFT>::EhFrameHdr = make<EhFrameHeader<ELFT>>(); - Add(In<ELFT>::EhFrameHdr); + InX::EhFrameHdr = make<EhFrameHeader>(); + Add(InX::EhFrameHdr); } - In<ELFT>::EhFrame = make<EhFrameSection<ELFT>>(); - Add(In<ELFT>::EhFrame); + InX::EhFrame = make<EhFrameSection>(); + Add(InX::EhFrame); } if (InX::SymTab) @@ -417,8 +392,97 @@ template <class ELFT> void Writer<ELFT>::createSyntheticSections() { Add(InX::StrTab); } +// The main function of the writer. +template <class ELFT> void Writer<ELFT>::run() { + // Create linker-synthesized sections such as .got or .plt. + // Such sections are of type input section. + createSyntheticSections<ELFT>(); + + if (!Config->Relocatable) + combineEhFrameSections<ELFT>(); + + // We want to process linker script commands. When SECTIONS command + // is given we let it create sections. + Script->processSectionCommands(); + + // Linker scripts controls how input sections are assigned to output sections. + // Input sections that were not handled by scripts are called "orphans", and + // they are assigned to output sections by the default rule. Process that. + Script->addOrphanSections(); + + if (Config->Discard != DiscardPolicy::All) + copyLocalSymbols(); + + if (Config->CopyRelocs) + addSectionSymbols(); + + // Now that we have a complete set of output sections. This function + // completes section contents. For example, we need to add strings + // to the string table, and add entries to .got and .plt. + // finalizeSections does that. + finalizeSections(); + if (errorCount()) + return; + + // If -compressed-debug-sections is specified, we need to compress + // .debug_* sections. Do it right now because it changes the size of + // output sections. + parallelForEach(OutputSections, + [](OutputSection *Sec) { Sec->maybeCompress<ELFT>(); }); + + Script->assignAddresses(); + Script->allocateHeaders(Phdrs); + + // Remove empty PT_LOAD to avoid causing the dynamic linker to try to mmap a + // 0 sized region. This has to be done late since only after assignAddresses + // we know the size of the sections. + removeEmptyPTLoad(); + + if (!Config->OFormatBinary) + assignFileOffsets(); + else + assignFileOffsetsBinary(); + + setPhdrs(); + + if (Config->Relocatable) { + for (OutputSection *Sec : OutputSections) + Sec->Addr = 0; + } + + // It does not make sense try to open the file if we have error already. + if (errorCount()) + return; + // Write the result down to a file. + openFile(); + if (errorCount()) + return; + + if (!Config->OFormatBinary) { + writeTrapInstr(); + writeHeader(); + writeSections(); + } else { + writeSectionsBinary(); + } + + // Backfill .note.gnu.build-id section content. This is done at last + // because the content is usually a hash value of the entire output file. + writeBuildId(); + if (errorCount()) + return; + + // Handle -Map option. + writeMapFile(); + if (errorCount()) + return; + + if (auto E = Buffer->commit()) + error("failed to write to the output file: " + toString(std::move(E))); +} + static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, - const SymbolBody &B) { + const Symbol &B) { if (B.isFile() || B.isSection()) return false; @@ -443,27 +507,25 @@ static bool shouldKeepInSymtab(SectionBase *Sec, StringRef SymName, return !Sec || !(Sec->Flags & SHF_MERGE); } -static bool includeInSymtab(const SymbolBody &B) { - if (!B.isLocal() && !B.symbol()->IsUsedInRegularObj) +static bool includeInSymtab(const Symbol &B) { + if (!B.isLocal() && !B.IsUsedInRegularObj) return false; - if (auto *D = dyn_cast<DefinedRegular>(&B)) { + if (auto *D = dyn_cast<Defined>(&B)) { // Always include absolute symbols. SectionBase *Sec = D->Section; if (!Sec) return true; - if (auto *IS = dyn_cast<InputSectionBase>(Sec)) { - Sec = IS->Repl; - IS = cast<InputSectionBase>(Sec); - // Exclude symbols pointing to garbage-collected sections. - if (!IS->Live) - return false; - } + Sec = Sec->Repl; + // Exclude symbols pointing to garbage-collected sections. + if (isa<InputSectionBase>(Sec) && !Sec->Live) + return false; if (auto *S = dyn_cast<MergeInputSection>(Sec)) if (!S->getSectionPiece(D->Value)->Live) return false; + return true; } - return true; + return B.Used; } // Local symbols are not in the linker's symbol table. This function scans @@ -471,12 +533,13 @@ static bool includeInSymtab(const SymbolBody &B) { template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { if (!InX::SymTab) return; - for (elf::ObjectFile<ELFT> *F : Symtab<ELFT>::X->getObjectFiles()) { - for (SymbolBody *B : F->getLocalSymbols()) { - if (!B->IsLocal) + for (InputFile *File : ObjectFiles) { + ObjFile<ELFT> *F = cast<ObjFile<ELFT>>(File); + for (Symbol *B : F->getLocalSymbols()) { + if (!B->isLocal()) fatal(toString(F) + ": broken object: getLocalSymbols returns a non-local symbol"); - auto *DR = dyn_cast<DefinedRegular>(B); + auto *DR = dyn_cast<Defined>(B); // No reason to keep local undefined symbol in symtab. if (!DR) @@ -493,27 +556,36 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() { } template <class ELFT> void Writer<ELFT>::addSectionSymbols() { - // Create one STT_SECTION symbol for each output section we might - // have a relocation with. - for (BaseCommand *Base : Script->Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base); - if (!Cmd) + // Create a section symbol for each output section so that we can represent + // relocations that point to the section. If we know that no relocation is + // referring to a section (that happens if the section is a synthetic one), we + // don't create a section symbol for that section. + for (BaseCommand *Base : Script->SectionCommands) { + auto *Sec = dyn_cast<OutputSection>(Base); + if (!Sec) continue; - auto I = llvm::find_if(Cmd->Commands, [](BaseCommand *Base) { + auto I = llvm::find_if(Sec->SectionCommands, [](BaseCommand *Base) { if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) return !ISD->Sections.empty(); return false; }); - if (I == Cmd->Commands.end()) + if (I == Sec->SectionCommands.end()) continue; InputSection *IS = cast<InputSectionDescription>(*I)->Sections[0]; - if (isa<SyntheticSection>(IS) || IS->Type == SHT_REL || - IS->Type == SHT_RELA) + + // Relocations are not using REL[A] section symbols. + if (IS->Type == SHT_REL || IS->Type == SHT_RELA) + continue; + + // Unlike other synthetic sections, mergeable output sections contain data + // copied from input sections, and there may be a relocation pointing to its + // contents if -r or -emit-reloc are given. + if (isa<SyntheticSection>(IS) && !(IS->Flags & SHF_MERGE)) continue; auto *Sym = - make<DefinedRegular>("", /*IsLocal=*/true, /*StOther=*/0, STT_SECTION, - /*Value=*/0, /*Size=*/0, IS, nullptr); + make<Defined>(IS->File, "", STB_LOCAL, /*StOther=*/0, STT_SECTION, + /*Value=*/0, /*Size=*/0, IS); InX::SymTab->addSymbol(Sym); } } @@ -524,7 +596,7 @@ template <class ELFT> void Writer<ELFT>::addSectionSymbols() { // // This function returns true if a section needs to be put into a // PT_GNU_RELRO segment. -bool elf::isRelroSection(const OutputSection *Sec) { +static bool isRelroSection(const OutputSection *Sec) { if (!Config->ZRelro) return false; @@ -575,20 +647,14 @@ bool elf::isRelroSection(const OutputSection *Sec) { if (Sec == InX::Dynamic->getParent()) return true; - // .bss.rel.ro is used for copy relocations for read-only symbols. - // Since the dynamic linker needs to process copy relocations, the - // section cannot be read-only, but once initialized, they shouldn't - // change. - if (Sec == InX::BssRelRo->getParent()) - return true; - // Sections with some special names are put into RELRO. This is a // bit unfortunate because section names shouldn't be significant in // ELF in spirit. But in reality many linker features depend on // magic section names. StringRef S = Sec->Name; - return S == ".data.rel.ro" || S == ".ctors" || S == ".dtors" || S == ".jcr" || - S == ".eh_frame" || S == ".openbsd.randomdata"; + return S == ".data.rel.ro" || S == ".bss.rel.ro" || S == ".ctors" || + S == ".dtors" || S == ".jcr" || S == ".eh_frame" || + S == ".openbsd.randomdata"; } // We compute a rank for each section. The rank indicates where the @@ -696,8 +762,8 @@ static unsigned getSectionRank(const OutputSection *Sec) { if (IsNoBits) Rank |= RF_BSS; - // // Some architectures have additional ordering restrictions for sections - // // within the same PT_LOAD. + // Some architectures have additional ordering restrictions for sections + // within the same PT_LOAD. if (Config->EMachine == EM_PPC64) { // PPC64 has a number of special SHT_PROGBITS+SHF_ALLOC+SHF_WRITE sections // that we would like to make sure appear is a specific order to maximize @@ -735,8 +801,8 @@ static unsigned getSectionRank(const OutputSection *Sec) { } static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) { - const OutputSection *A = cast<OutputSectionCommand>(ACmd)->Sec; - const OutputSection *B = cast<OutputSectionCommand>(BCmd)->Sec; + const OutputSection *A = cast<OutputSection>(ACmd); + const OutputSection *B = cast<OutputSection>(BCmd); if (A->SortRank != B->SortRank) return A->SortRank < B->SortRank; if (!(A->SortRank & RF_NOT_ADDR_SET)) @@ -746,36 +812,12 @@ static bool compareSections(const BaseCommand *ACmd, const BaseCommand *BCmd) { } void PhdrEntry::add(OutputSection *Sec) { - Last = Sec; - if (!First) - First = Sec; + LastSec = Sec; + if (!FirstSec) + FirstSec = Sec; p_align = std::max(p_align, Sec->Alignment); if (p_type == PT_LOAD) - Sec->FirstInPtLoad = First; -} - -template <class ELFT> -static Symbol *addRegular(StringRef Name, SectionBase *Sec, uint64_t Value, - uint8_t StOther = STV_HIDDEN, - uint8_t Binding = STB_WEAK) { - // The linker generated symbols are added as STB_WEAK to allow user defined - // ones to override them. - return Symtab<ELFT>::X->addRegular(Name, StOther, STT_NOTYPE, Value, - /*Size=*/0, Binding, Sec, - /*File=*/nullptr); -} - -template <class ELFT> -static DefinedRegular * -addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val, - uint8_t StOther = STV_HIDDEN, uint8_t Binding = STB_GLOBAL) { - SymbolBody *S = Symtab<ELFT>::X->find(Name); - if (!S) - return nullptr; - if (S->isInCurrentDSO()) - return nullptr; - return cast<DefinedRegular>( - addRegular<ELFT>(Name, Sec, Val, StOther, Binding)->body()); + Sec->PtLoad = this; } // The beginning and the ending of .rel[a].plt section are marked @@ -785,177 +827,115 @@ addOptionalRegular(StringRef Name, SectionBase *Sec, uint64_t Val, // need these symbols, since IRELATIVE relocs are resolved through GOT // and PLT. For details, see http://www.airs.com/blog/archives/403. template <class ELFT> void Writer<ELFT>::addRelIpltSymbols() { - if (InX::DynSymTab) + if (!Config->Static) return; StringRef S = Config->IsRela ? "__rela_iplt_start" : "__rel_iplt_start"; - addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, 0, STV_HIDDEN, STB_WEAK); + addOptionalRegular<ELFT>(S, InX::RelaIplt, 0, STV_HIDDEN, STB_WEAK); S = Config->IsRela ? "__rela_iplt_end" : "__rel_iplt_end"; - addOptionalRegular<ELFT>(S, In<ELFT>::RelaIplt, -1, STV_HIDDEN, STB_WEAK); + addOptionalRegular<ELFT>(S, InX::RelaIplt, -1, STV_HIDDEN, STB_WEAK); } -// The linker is expected to define some symbols depending on -// the linking result. This function defines such symbols. -template <class ELFT> void Writer<ELFT>::addReservedSymbols() { - if (Config->EMachine == EM_MIPS) { - // Define _gp for MIPS. st_value of _gp symbol will be updated by Writer - // so that it points to an absolute address which by default is relative - // to GOT. Default offset is 0x7ff0. - // See "Global Data Symbols" in Chapter 6 in the following document: - // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - ElfSym::MipsGp = Symtab<ELFT>::X->addAbsolute("_gp", STV_HIDDEN, STB_LOCAL); - - // On MIPS O32 ABI, _gp_disp is a magic symbol designates offset between - // start of function and 'gp' pointer into GOT. - if (Symtab<ELFT>::X->find("_gp_disp")) - ElfSym::MipsGpDisp = - Symtab<ELFT>::X->addAbsolute("_gp_disp", STV_HIDDEN, STB_LOCAL); +template <class ELFT> +void Writer<ELFT>::forEachRelSec(std::function<void(InputSectionBase &)> Fn) { + // Scan all relocations. Each relocation goes through a series + // of tests to determine if it needs special treatment, such as + // creating GOT, PLT, copy relocations, etc. + // Note that relocations for non-alloc sections are directly + // processed by InputSection::relocateNonAlloc. + for (InputSectionBase *IS : InputSections) + if (IS->Live && isa<InputSection>(IS) && (IS->Flags & SHF_ALLOC)) + Fn(*IS); + for (EhInputSection *ES : InX::EhFrame->Sections) + Fn(*ES); +} - // The __gnu_local_gp is a magic symbol equal to the current value of 'gp' - // pointer. This symbol is used in the code generated by .cpload pseudo-op - // in case of using -mno-shared option. - // https://sourceware.org/ml/binutils/2004-12/msg00094.html - if (Symtab<ELFT>::X->find("__gnu_local_gp")) - ElfSym::MipsLocalGp = - Symtab<ELFT>::X->addAbsolute("__gnu_local_gp", STV_HIDDEN, STB_LOCAL); +// This function generates assignments for predefined symbols (e.g. _end or +// _etext) and inserts them into the commands sequence to be processed at the +// appropriate time. This ensures that the value is going to be correct by the +// time any references to these symbols are processed and is equivalent to +// defining these symbols explicitly in the linker script. +template <class ELFT> void Writer<ELFT>::setReservedSymbolSections() { + if (ElfSym::GlobalOffsetTable) { + // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to + // be at some offset from the base of the .got section, usually 0 or the end + // of the .got + InputSection *GotSection = InX::MipsGot ? cast<InputSection>(InX::MipsGot) + : cast<InputSection>(InX::Got); + ElfSym::GlobalOffsetTable->Section = GotSection; } - // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to - // be at some offset from the base of the .got section, usually 0 or the end - // of the .got - InputSection *GotSection = InX::MipsGot ? cast<InputSection>(InX::MipsGot) - : cast<InputSection>(InX::Got); - ElfSym::GlobalOffsetTable = addOptionalRegular<ELFT>( - "_GLOBAL_OFFSET_TABLE_", GotSection, Target->GotBaseSymOff); - - // __tls_get_addr is defined by the dynamic linker for dynamic ELFs. For - // static linking the linker is required to optimize away any references to - // __tls_get_addr, so it's not defined anywhere. Create a hidden definition - // to avoid the undefined symbol error. - if (!InX::DynSymTab) - Symtab<ELFT>::X->addIgnored("__tls_get_addr"); - - // __ehdr_start is the location of ELF file headers. Note that we define - // this symbol unconditionally even when using a linker script, which - // differs from the behavior implemented by GNU linker which only define - // this symbol if ELF headers are in the memory mapped segment. - // __executable_start is not documented, but the expectation of at - // least the android libc is that it points to the elf header too. - // __dso_handle symbol is passed to cxa_finalize as a marker to identify - // each DSO. The address of the symbol doesn't matter as long as they are - // different in different DSOs, so we chose the start address of the DSO. - for (const char *Name : - {"__ehdr_start", "__executable_start", "__dso_handle"}) - addOptionalRegular<ELFT>(Name, Out::ElfHeader, 0, STV_HIDDEN); - - // If linker script do layout we do not need to create any standart symbols. - if (Script->Opt.HasSections) - return; - - auto Add = [](StringRef S) { - return addOptionalRegular<ELFT>(S, Out::ElfHeader, 0, STV_DEFAULT); - }; - - ElfSym::Bss = Add("__bss_start"); - ElfSym::End1 = Add("end"); - ElfSym::End2 = Add("_end"); - ElfSym::Etext1 = Add("etext"); - ElfSym::Etext2 = Add("_etext"); - ElfSym::Edata1 = Add("edata"); - ElfSym::Edata2 = Add("_edata"); -} - -// Sort input sections by section name suffixes for -// __attribute__((init_priority(N))). -static void sortInitFini(OutputSectionCommand *Cmd) { - if (Cmd) - Cmd->sortInitFini(); -} + PhdrEntry *Last = nullptr; + PhdrEntry *LastRO = nullptr; -// Sort input sections by the special rule for .ctors and .dtors. -static void sortCtorsDtors(OutputSectionCommand *Cmd) { - if (Cmd) - Cmd->sortCtorsDtors(); -} + for (PhdrEntry *P : Phdrs) { + if (P->p_type != PT_LOAD) + continue; + Last = P; + if (!(P->p_flags & PF_W)) + LastRO = P; + } -// Sort input sections using the list provided by --symbol-ordering-file. -template <class ELFT> static void sortBySymbolsOrder() { - if (Config->SymbolOrderingFile.empty()) - return; + if (LastRO) { + // _etext is the first location after the last read-only loadable segment. + if (ElfSym::Etext1) + ElfSym::Etext1->Section = LastRO->LastSec; + if (ElfSym::Etext2) + ElfSym::Etext2->Section = LastRO->LastSec; + } - // Build a map from symbols to their priorities. Symbols that didn't - // appear in the symbol ordering file have the lowest priority 0. - // All explicitly mentioned symbols have negative (higher) priorities. - DenseMap<StringRef, int> SymbolOrder; - int Priority = -Config->SymbolOrderingFile.size(); - for (StringRef S : Config->SymbolOrderingFile) - SymbolOrder.insert({S, Priority++}); - - // Build a map from sections to their priorities. - DenseMap<SectionBase *, int> SectionOrder; - for (elf::ObjectFile<ELFT> *File : Symtab<ELFT>::X->getObjectFiles()) { - for (SymbolBody *Body : File->getSymbols()) { - auto *D = dyn_cast<DefinedRegular>(Body); - if (!D || !D->Section) - continue; - int &Priority = SectionOrder[D->Section]; - Priority = std::min(Priority, SymbolOrder.lookup(D->getName())); + if (Last) { + // _edata points to the end of the last mapped initialized section. + OutputSection *Edata = nullptr; + for (OutputSection *OS : OutputSections) { + if (OS->Type != SHT_NOBITS) + Edata = OS; + if (OS == Last->LastSec) + break; } - } - // Sort sections by priority. - for (BaseCommand *Base : Script->Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - Cmd->sort([&](InputSectionBase *S) { return SectionOrder.lookup(S); }); -} + if (ElfSym::Edata1) + ElfSym::Edata1->Section = Edata; + if (ElfSym::Edata2) + ElfSym::Edata2->Section = Edata; -template <class ELFT> -void Writer<ELFT>::forEachRelSec(std::function<void(InputSectionBase &)> Fn) { - for (InputSectionBase *IS : InputSections) { - if (!IS->Live) - continue; - // Scan all relocations. Each relocation goes through a series - // of tests to determine if it needs special treatment, such as - // creating GOT, PLT, copy relocations, etc. - // Note that relocations for non-alloc sections are directly - // processed by InputSection::relocateNonAlloc. - if (!(IS->Flags & SHF_ALLOC)) - continue; - if (isa<InputSection>(IS) || isa<EhInputSection>(IS)) - Fn(*IS); + // _end is the first location after the uninitialized data region. + if (ElfSym::End1) + ElfSym::End1->Section = Last->LastSec; + if (ElfSym::End2) + ElfSym::End2->Section = Last->LastSec; } - if (!Config->Relocatable) { - for (EhInputSection *ES : In<ELFT>::EhFrame->Sections) - Fn(*ES); - } -} + if (ElfSym::Bss) + ElfSym::Bss->Section = findSection(".bss"); -template <class ELFT> void Writer<ELFT>::createSections() { - for (InputSectionBase *IS : InputSections) - if (IS) - Factory.addInputSec(IS, getOutputSectionName(IS->Name)); - - Script->fabricateDefaultCommands(); - sortBySymbolsOrder<ELFT>(); - sortInitFini(findSectionCommand(".init_array")); - sortInitFini(findSectionCommand(".fini_array")); - sortCtorsDtors(findSectionCommand(".ctors")); - sortCtorsDtors(findSectionCommand(".dtors")); + // Setup MIPS _gp_disp/__gnu_local_gp symbols which should + // be equal to the _gp symbol's value. + if (ElfSym::MipsGp) { + // Find GP-relative section with the lowest address + // and use this address to calculate default _gp value. + for (OutputSection *OS : OutputSections) { + if (OS->Flags & SHF_MIPS_GPREL) { + ElfSym::MipsGp->Section = OS; + ElfSym::MipsGp->Value = 0x7ff0; + break; + } + } + } } // We want to find how similar two ranks are. // The more branches in getSectionRank that match, the more similar they are. // Since each branch corresponds to a bit flag, we can just use // countLeadingZeros. -static int getRankProximity(OutputSection *A, OutputSection *B) { +static int getRankProximityAux(OutputSection *A, OutputSection *B) { return countLeadingZeros(A->SortRank ^ B->SortRank); } static int getRankProximity(OutputSection *A, BaseCommand *B) { - if (auto *Cmd = dyn_cast<OutputSectionCommand>(B)) - if (Cmd->Sec) - return getRankProximity(A, Cmd->Sec); + if (auto *Sec = dyn_cast<OutputSection>(B)) + if (Sec->Live) + return getRankProximityAux(A, Sec); return -1; } @@ -974,7 +954,7 @@ static int getRankProximity(OutputSection *A, BaseCommand *B) { // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. static bool shouldSkip(BaseCommand *Cmd) { - if (isa<OutputSectionCommand>(Cmd)) + if (isa<OutputSection>(Cmd)) return false; if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd)) return Assign->Name != "."; @@ -988,7 +968,7 @@ template <typename ELFT> static std::vector<BaseCommand *>::iterator findOrphanPos(std::vector<BaseCommand *>::iterator B, std::vector<BaseCommand *>::iterator E) { - OutputSection *Sec = cast<OutputSectionCommand>(*E)->Sec; + OutputSection *Sec = cast<OutputSection>(*E); // Find the first element that has as close a rank as possible. auto I = std::max_element(B, E, [=](BaseCommand *A, BaseCommand *B) { @@ -1000,44 +980,84 @@ findOrphanPos(std::vector<BaseCommand *>::iterator B, // Consider all existing sections with the same proximity. int Proximity = getRankProximity(Sec, *I); for (; I != E; ++I) { - auto *Cmd = dyn_cast<OutputSectionCommand>(*I); - if (!Cmd || !Cmd->Sec) + auto *CurSec = dyn_cast<OutputSection>(*I); + if (!CurSec || !CurSec->Live) continue; - if (getRankProximity(Sec, Cmd->Sec) != Proximity || - Sec->SortRank < Cmd->Sec->SortRank) + if (getRankProximity(Sec, CurSec) != Proximity || + Sec->SortRank < CurSec->SortRank) break; } - auto J = std::find_if( - llvm::make_reverse_iterator(I), llvm::make_reverse_iterator(B), - [](BaseCommand *Cmd) { return isa<OutputSectionCommand>(Cmd); }); + + auto IsLiveSection = [](BaseCommand *Cmd) { + auto *OS = dyn_cast<OutputSection>(Cmd); + return OS && OS->Live; + }; + + auto J = std::find_if(llvm::make_reverse_iterator(I), + llvm::make_reverse_iterator(B), IsLiveSection); I = J.base(); + + // As a special case, if the orphan section is the last section, put + // it at the very end, past any other commands. + // This matches bfd's behavior and is convenient when the linker script fully + // specifies the start of the file, but doesn't care about the end (the non + // alloc sections for example). + auto NextSec = std::find_if(I, E, IsLiveSection); + if (NextSec == E) + return E; + while (I != E && shouldSkip(*I)) ++I; return I; } +// If no layout was provided by linker script, we want to apply default +// sorting for special input sections and handle --symbol-ordering-file. +template <class ELFT> void Writer<ELFT>::sortInputSections() { + assert(!Script->HasSectionsCommand); + + // Sort input sections by priority using the list provided + // by --symbol-ordering-file. + DenseMap<SectionBase *, int> Order = buildSectionOrder(); + if (!Order.empty()) + for (BaseCommand *Base : Script->SectionCommands) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + if (Sec->Live) + Sec->sort([&](InputSectionBase *S) { return Order.lookup(S); }); + + // Sort input sections by section name suffixes for + // __attribute__((init_priority(N))). + if (OutputSection *Sec = findSection(".init_array")) + Sec->sortInitFini(); + if (OutputSection *Sec = findSection(".fini_array")) + Sec->sortInitFini(); + + // Sort input sections by the special rule for .ctors and .dtors. + if (OutputSection *Sec = findSection(".ctors")) + Sec->sortCtorsDtors(); + if (OutputSection *Sec = findSection(".dtors")) + Sec->sortCtorsDtors(); +} + template <class ELFT> void Writer<ELFT>::sortSections() { - if (Script->Opt.HasSections) - Script->adjustSectionsBeforeSorting(); + Script->adjustSectionsBeforeSorting(); // Don't sort if using -r. It is not necessary and we want to preserve the // relative order for SHF_LINK_ORDER sections. if (Config->Relocatable) return; - for (BaseCommand *Base : Script->Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - if (OutputSection *Sec = Cmd->Sec) - Sec->SortRank = getSectionRank(Sec); - - if (!Script->Opt.HasSections) { - // We know that all the OutputSectionCommands are contiguous in - // this case. - auto E = Script->Opt.Commands.end(); - auto I = Script->Opt.Commands.begin(); - auto IsSection = [](BaseCommand *Base) { - return isa<OutputSectionCommand>(Base); - }; + for (BaseCommand *Base : Script->SectionCommands) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + Sec->SortRank = getSectionRank(Sec); + + if (!Script->HasSectionsCommand) { + sortInputSections(); + + // We know that all the OutputSections are contiguous in this case. + auto E = Script->SectionCommands.end(); + auto I = Script->SectionCommands.begin(); + auto IsSection = [](BaseCommand *Base) { return isa<OutputSection>(Base); }; I = std::find_if(I, E, IsSection); E = std::find_if(llvm::make_reverse_iterator(E), llvm::make_reverse_iterator(I), IsSection) @@ -1077,7 +1097,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() { // a PT_LOAD. // // There is some ambiguity as to where exactly a new entry should be - // inserted, because Opt.Commands contains not only output section + // inserted, because Commands contains not only output section // commands but also other types of commands such as symbol assignment // expressions. There's no correct answer here due to the lack of the // formal specification of the linker script. We use heuristics to @@ -1085,11 +1105,11 @@ template <class ELFT> void Writer<ELFT>::sortSections() { // after another commands. For the details, look at shouldSkip // function. - auto I = Script->Opt.Commands.begin(); - auto E = Script->Opt.Commands.end(); + auto I = Script->SectionCommands.begin(); + auto E = Script->SectionCommands.end(); auto NonScriptI = std::find_if(I, E, [](BaseCommand *Base) { - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - return Cmd->Sec && Cmd->Sec->SectionIndex == INT_MAX; + if (auto *Sec = dyn_cast<OutputSection>(Base)) + return Sec->Live && Sec->SectionIndex == INT_MAX; return false; }); @@ -1109,13 +1129,13 @@ template <class ELFT> void Writer<ELFT>::sortSections() { while (NonScriptI != E) { auto Pos = findOrphanPos<ELFT>(I, NonScriptI); - OutputSection *Orphan = cast<OutputSectionCommand>(*NonScriptI)->Sec; + OutputSection *Orphan = cast<OutputSection>(*NonScriptI); // As an optimization, find all sections with the same sort rank // and insert them with one rotate. unsigned Rank = Orphan->SortRank; auto End = std::find_if(NonScriptI + 1, E, [=](BaseCommand *Cmd) { - return cast<OutputSectionCommand>(Cmd)->Sec->SortRank != Rank; + return cast<OutputSection>(Cmd)->SortRank != Rank; }); std::rotate(Pos, NonScriptI, End); NonScriptI = End; @@ -1124,6 +1144,125 @@ template <class ELFT> void Writer<ELFT>::sortSections() { Script->adjustSectionsAfterSorting(); } +static bool compareByFilePosition(InputSection *A, InputSection *B) { + // Synthetic doesn't have link order dependecy, stable_sort will keep it last + if (A->kind() == InputSectionBase::Synthetic || + B->kind() == InputSectionBase::Synthetic) + return false; + InputSection *LA = A->getLinkOrderDep(); + InputSection *LB = B->getLinkOrderDep(); + OutputSection *AOut = LA->getParent(); + OutputSection *BOut = LB->getParent(); + if (AOut != BOut) + return AOut->SectionIndex < BOut->SectionIndex; + return LA->OutSecOff < LB->OutSecOff; +} + +// This function is used by the --merge-exidx-entries to detect duplicate +// .ARM.exidx sections. It is Arm only. +// +// The .ARM.exidx section is of the form: +// | PREL31 offset to function | Unwind instructions for function | +// where the unwind instructions are either a small number of unwind +// instructions inlined into the table entry, the special CANT_UNWIND value of +// 0x1 or a PREL31 offset into a .ARM.extab Section that contains unwind +// instructions. +// +// We return true if all the unwind instructions in the .ARM.exidx entries of +// Cur can be merged into the last entry of Prev. +static bool isDuplicateArmExidxSec(InputSection *Prev, InputSection *Cur) { + + // References to .ARM.Extab Sections have bit 31 clear and are not the + // special EXIDX_CANTUNWIND bit-pattern. + auto IsExtabRef = [](uint32_t Unwind) { + return (Unwind & 0x80000000) == 0 && Unwind != 0x1; + }; + + struct ExidxEntry { + ulittle32_t Fn; + ulittle32_t Unwind; + }; + + // Get the last table Entry from the previous .ARM.exidx section. + const ExidxEntry &PrevEntry = *reinterpret_cast<const ExidxEntry *>( + Prev->Data.data() + Prev->getSize() - sizeof(ExidxEntry)); + if (IsExtabRef(PrevEntry.Unwind)) + return false; + + // We consider the unwind instructions of an .ARM.exidx table entry + // a duplicate if the previous unwind instructions if: + // - Both are the special EXIDX_CANTUNWIND. + // - Both are the same inline unwind instructions. + // We do not attempt to follow and check links into .ARM.extab tables as + // consecutive identical entries are rare and the effort to check that they + // are identical is high. + + if (isa<SyntheticSection>(Cur)) + // Exidx sentinel section has implicit EXIDX_CANTUNWIND; + return PrevEntry.Unwind == 0x1; + + ArrayRef<const ExidxEntry> Entries( + reinterpret_cast<const ExidxEntry *>(Cur->Data.data()), + Cur->getSize() / sizeof(ExidxEntry)); + for (const ExidxEntry &Entry : Entries) + if (IsExtabRef(Entry.Unwind) || Entry.Unwind != PrevEntry.Unwind) + return false; + // All table entries in this .ARM.exidx Section can be merged into the + // previous Section. + return true; +} + +template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() { + for (OutputSection *Sec : OutputSections) { + if (!(Sec->Flags & SHF_LINK_ORDER)) + continue; + + // Link order may be distributed across several InputSectionDescriptions + // but sort must consider them all at once. + std::vector<InputSection **> ScriptSections; + std::vector<InputSection *> Sections; + for (BaseCommand *Base : Sec->SectionCommands) { + if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) { + for (InputSection *&IS : ISD->Sections) { + ScriptSections.push_back(&IS); + Sections.push_back(IS); + } + } + } + std::stable_sort(Sections.begin(), Sections.end(), compareByFilePosition); + + if (Config->MergeArmExidx && !Config->Relocatable && + Config->EMachine == EM_ARM && Sec->Type == SHT_ARM_EXIDX) { + // The EHABI for the Arm Architecture permits consecutive identical + // table entries to be merged. We use a simple implementation that + // removes a .ARM.exidx Input Section if it can be merged into the + // previous one. This does not require any rewriting of InputSection + // contents but misses opportunities for fine grained deduplication where + // only a subset of the InputSection contents can be merged. + int Cur = 1; + int Prev = 0; + int N = Sections.size(); + while (Cur < N) { + if (isDuplicateArmExidxSec(Sections[Prev], Sections[Cur])) + Sections[Cur] = nullptr; + else + Prev = Cur; + ++Cur; + } + } + + for (int I = 0, N = Sections.size(); I < N; ++I) + *ScriptSections[I] = Sections[I]; + + // Remove the Sections we marked as duplicate earlier. + for (BaseCommand *Base : Sec->SectionCommands) + if (auto *ISD = dyn_cast<InputSectionDescription>(Base)) + ISD->Sections.erase( + std::remove(ISD->Sections.begin(), ISD->Sections.end(), nullptr), + ISD->Sections.end()); + } +} + static void applySynthetic(const std::vector<SyntheticSection *> &Sections, std::function<void(SyntheticSection *)> Fn) { for (SyntheticSection *SS : Sections) @@ -1131,10 +1270,18 @@ static void applySynthetic(const std::vector<SyntheticSection *> &Sections, Fn(SS); } -// We need to add input synthetic sections early in createSyntheticSections() -// to make them visible from linkescript side. But not all sections are always -// required to be in output. For example we don't need dynamic section content -// sometimes. This function filters out such unused sections from the output. +// In order to allow users to manipulate linker-synthesized sections, +// we had to add synthetic sections to the input section list early, +// even before we make decisions whether they are needed. This allows +// users to write scripts like this: ".mygot : { .got }". +// +// Doing it has an unintended side effects. If it turns out that we +// don't need a .got (for example) at all because there's no +// relocation that needs a .got, we don't want to emit .got. +// +// To deal with the above problem, this function is called after +// scanRelocations is called to remove synthetic sections that turn +// out to be empty. static void removeUnusedSyntheticSections() { // All input synthetic sections that can be empty are placed after // all regular ones. We iterate over them all and exit at first @@ -1146,55 +1293,73 @@ static void removeUnusedSyntheticSections() { OutputSection *OS = SS->getParent(); if (!SS->empty() || !OS) continue; - if ((SS == InX::Got || SS == InX::MipsGot) && ElfSym::GlobalOffsetTable) - continue; - OutputSectionCommand *Cmd = Script->getCmd(OS); - std::vector<BaseCommand *>::iterator Empty = Cmd->Commands.end(); - for (auto I = Cmd->Commands.begin(), E = Cmd->Commands.end(); I != E; ++I) { + std::vector<BaseCommand *>::iterator Empty = OS->SectionCommands.end(); + for (auto I = OS->SectionCommands.begin(), E = OS->SectionCommands.end(); + I != E; ++I) { BaseCommand *B = *I; if (auto *ISD = dyn_cast<InputSectionDescription>(B)) { - auto P = std::find(ISD->Sections.begin(), ISD->Sections.end(), SS); - if (P != ISD->Sections.end()) - ISD->Sections.erase(P); + llvm::erase_if(ISD->Sections, + [=](InputSection *IS) { return IS == SS; }); if (ISD->Sections.empty()) Empty = I; } } - if (Empty != Cmd->Commands.end()) - Cmd->Commands.erase(Empty); + if (Empty != OS->SectionCommands.end()) + OS->SectionCommands.erase(Empty); // If there are no other sections in the output section, remove it from the // output. - if (Cmd->Commands.empty()) { - // Also remove script commands matching the output section. - auto &Cmds = Script->Opt.Commands; - auto I = std::remove_if(Cmds.begin(), Cmds.end(), [&](BaseCommand *Cmd) { - if (auto *OSCmd = dyn_cast<OutputSectionCommand>(Cmd)) - return OSCmd->Sec == OS; - return false; - }); - Cmds.erase(I, Cmds.end()); - } + if (OS->SectionCommands.empty()) + OS->Live = false; } } +// Returns true if a symbol can be replaced at load-time by a symbol +// with the same name defined in other ELF executable or DSO. +static bool computeIsPreemptible(const Symbol &B) { + assert(!B.isLocal()); + // Only symbols that appear in dynsym can be preempted. + if (!B.includeInDynsym()) + return false; + + // Only default visibility symbols can be preempted. + if (B.Visibility != STV_DEFAULT) + return false; + + // At this point copy relocations have not been created yet, so any + // symbol that is not defined locally is preemptible. + if (!B.isDefined()) + return true; + + // If we have a dynamic list it specifies which local symbols are preemptible. + if (Config->HasDynamicList) + return false; + + if (!Config->Shared) + return false; + + // -Bsymbolic means that definitions are not preempted. + if (Config->Bsymbolic || (Config->BsymbolicFunctions && B.isFunc())) + return false; + return true; +} + // Create output section objects and add them to OutputSections. template <class ELFT> void Writer<ELFT>::finalizeSections() { - Out::DebugInfo = findSectionInScript(".debug_info"); - Out::PreinitArray = findSectionInScript(".preinit_array"); - Out::InitArray = findSectionInScript(".init_array"); - Out::FiniArray = findSectionInScript(".fini_array"); + Out::DebugInfo = findSection(".debug_info"); + Out::PreinitArray = findSection(".preinit_array"); + Out::InitArray = findSection(".init_array"); + Out::FiniArray = findSection(".fini_array"); // The linker needs to define SECNAME_start, SECNAME_end and SECNAME_stop // symbols for sections, so that the runtime can get the start and end // addresses of each section by section name. Add such symbols. if (!Config->Relocatable) { addStartEndSymbols(); - for (BaseCommand *Base : Script->Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - if (Cmd->Sec) - addStartStopSymbols(Cmd->Sec); + for (BaseCommand *Base : Script->SectionCommands) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + addStartStopSymbols(Sec); } // Add _DYNAMIC symbol. Unlike GNU gold, our _DYNAMIC symbol has no type. @@ -1202,7 +1367,9 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (InX::DynSymTab) - addRegular<ELFT>("_DYNAMIC", InX::Dynamic, 0); + Symtab->addRegular<ELFT>("_DYNAMIC", STV_HIDDEN, STT_NOTYPE, 0 /*Value*/, + /*Size=*/0, STB_WEAK, InX::Dynamic, + /*File=*/nullptr); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); @@ -1210,12 +1377,16 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // This responsible for splitting up .eh_frame section into // pieces. The relocation scan uses those pieces, so this has to be // earlier. - applySynthetic({In<ELFT>::EhFrame}, + applySynthetic({InX::EhFrame}, [](SyntheticSection *SS) { SS->finalizeContents(); }); + for (Symbol *S : Symtab->getSymbols()) + S->IsPreemptible |= computeIsPreemptible(*S); + // Scan relocations. This must be done after every symbol is declared so that // we can correctly decide if a dynamic relocation is needed. - forEachRelSec(scanRelocations<ELFT>); + if (!Config->Relocatable) + forEachRelSec(scanRelocations<ELFT>); if (InX::Plt && !InX::Plt->empty()) InX::Plt->addSymbols(); @@ -1224,42 +1395,41 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. - for (Symbol *S : Symtab<ELFT>::X->getSymbols()) { - SymbolBody *Body = S->body(); - - if (!includeInSymtab(*Body)) + for (Symbol *Sym : Symtab->getSymbols()) { + if (!includeInSymtab(*Sym)) continue; if (InX::SymTab) - InX::SymTab->addSymbol(Body); + InX::SymTab->addSymbol(Sym); - if (InX::DynSymTab && S->includeInDynsym()) { - InX::DynSymTab->addSymbol(Body); - if (auto *SS = dyn_cast<SharedSymbol>(Body)) - if (cast<SharedFile<ELFT>>(SS->File)->isNeeded()) + if (InX::DynSymTab && Sym->includeInDynsym()) { + InX::DynSymTab->addSymbol(Sym); + if (auto *SS = dyn_cast<SharedSymbol>(Sym)) + if (cast<SharedFile<ELFT>>(Sym->File)->IsNeeded) In<ELFT>::VerNeed->addSymbol(SS); } } // Do not proceed if there was an undefined symbol. - if (ErrorCount) + if (errorCount()) return; addPredefinedSections(); removeUnusedSyntheticSections(); sortSections(); + Script->removeEmptyCommands(); // Now that we have the final list, create a list of all the - // OutputSectionCommands for convenience. - for (BaseCommand *Base : Script->Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - OutputSectionCommands.push_back(Cmd); + // OutputSections for convenience. + for (BaseCommand *Base : Script->SectionCommands) + if (auto *Sec = dyn_cast<OutputSection>(Base)) + OutputSections.push_back(Sec); // Prefer command line supplied address over other constraints. - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - auto I = Config->SectionStartMap.find(Cmd->Name); + for (OutputSection *Sec : OutputSections) { + auto I = Config->SectionStartMap.find(Sec->Name); if (I != Config->SectionStartMap.end()) - Cmd->AddrExpr = [=] { return I->second; }; + Sec->AddrExpr = [=] { return I->second; }; } // This is a bit of a hack. A value of 0 means undef, so we set it @@ -1268,8 +1438,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { Out::ElfHeader->SectionIndex = 1; unsigned I = 1; - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) { Sec->SectionIndex = I++; Sec->ShName = InX::ShStrTab->addString(Sec->Name); } @@ -1283,64 +1452,73 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() { Out::ProgramHeaders->Size = sizeof(Elf_Phdr) * Phdrs.size(); } + // Some symbols are defined in term of program headers. Now that we + // have the headers, we can find out which sections they point to. + setReservedSymbolSections(); + // Dynamic section must be the last one in this list and dynamic // symbol table section (DynSymTab) must be the first one. - applySynthetic({InX::DynSymTab, InX::Bss, InX::BssRelRo, - InX::GnuHashTab, In<ELFT>::HashTab, InX::SymTab, - InX::ShStrTab, InX::StrTab, In<ELFT>::VerDef, - InX::DynStrTab, InX::GdbIndex, InX::Got, - InX::MipsGot, InX::IgotPlt, InX::GotPlt, - In<ELFT>::RelaDyn, In<ELFT>::RelaIplt, In<ELFT>::RelaPlt, - InX::Plt, InX::Iplt, In<ELFT>::EhFrameHdr, - In<ELFT>::VerSym, In<ELFT>::VerNeed, InX::Dynamic}, - [](SyntheticSection *SS) { SS->finalizeContents(); }); + applySynthetic( + {InX::DynSymTab, InX::Bss, InX::BssRelRo, InX::GnuHashTab, + InX::HashTab, InX::SymTab, InX::ShStrTab, InX::StrTab, + In<ELFT>::VerDef, InX::DynStrTab, InX::Got, InX::MipsGot, + InX::IgotPlt, InX::GotPlt, InX::RelaDyn, InX::RelaIplt, + InX::RelaPlt, InX::Plt, InX::Iplt, InX::EhFrameHdr, + In<ELFT>::VerSym, In<ELFT>::VerNeed, InX::Dynamic}, + [](SyntheticSection *SS) { SS->finalizeContents(); }); + + if (!Script->HasSectionsCommand && !Config->Relocatable) + fixSectionAlignments(); + + // After link order processing .ARM.exidx sections can be deduplicated, which + // needs to be resolved before any other address dependent operation. + resolveShfLinkOrder(); - // Some architectures use small displacements for jump instructions. - // It is linker's responsibility to create thunks containing long - // jump instructions if jump targets are too far. Create thunks. - if (Target->NeedsThunks) { - // FIXME: only ARM Interworking and Mips LA25 Thunks are implemented, - // these - // do not require address information. To support range extension Thunks - // we need to assign addresses so that we can tell if jump instructions - // are out of range. This will need to turn into a loop that converges - // when no more Thunks are added + // Some architectures need to generate content that depends on the address + // of InputSections. For example some architectures use small displacements + // for jump instructions that is is the linker's responsibility for creating + // range extension thunks for. As the generation of the content may also + // alter InputSection addresses we must converge to a fixed point. + if (Target->NeedsThunks || Config->AndroidPackDynRelocs) { ThunkCreator TC; - Script->assignAddresses(); - if (TC.createThunks(OutputSectionCommands)) { - applySynthetic({InX::MipsGot}, - [](SyntheticSection *SS) { SS->updateAllocSize(); }); - if (TC.createThunks(OutputSectionCommands)) - fatal("All non-range thunks should be created in first call"); - } + AArch64Err843419Patcher A64P; + bool Changed; + do { + Script->assignAddresses(); + Changed = false; + if (Target->NeedsThunks) + Changed |= TC.createThunks(OutputSections); + if (Config->FixCortexA53Errata843419) { + if (Changed) + Script->assignAddresses(); + Changed |= A64P.createFixes(); + } + if (InX::MipsGot) + InX::MipsGot->updateAllocSize(); + Changed |= InX::RelaDyn->updateAllocSize(); + } while (Changed); } // Fill other section headers. The dynamic table is finalized // at the end because some tags like RELSZ depend on result // of finalizing other sections. - for (OutputSectionCommand *Cmd : OutputSectionCommands) - Cmd->finalize<ELFT>(); + for (OutputSection *Sec : OutputSections) + Sec->finalize<ELFT>(); // createThunks may have added local symbols to the static symbol table - applySynthetic({InX::SymTab, InX::ShStrTab, InX::StrTab}, + applySynthetic({InX::SymTab}, [](SyntheticSection *SS) { SS->postThunkContents(); }); } template <class ELFT> void Writer<ELFT>::addPredefinedSections() { // ARM ABI requires .ARM.exidx to be terminated by some piece of data. // We have the terminater synthetic section class. Add that at the end. - OutputSectionCommand *Cmd = findSectionCommand(".ARM.exidx"); - if (!Cmd || !Cmd->Sec || Config->Relocatable) + OutputSection *Cmd = findSection(".ARM.exidx"); + if (!Cmd || !Cmd->Live || Config->Relocatable) return; auto *Sentinel = make<ARMExidxSentinelSection>(); - Cmd->Sec->addSection(Sentinel); - // Add the sentinel to the last of these too. - auto ISD = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), - [](const BaseCommand *Base) { - return isa<InputSectionDescription>(Base); - }); - cast<InputSectionDescription>(*ISD)->Sections.push_back(Sentinel); + Cmd->addSection(Sentinel); } // The linker is expected to define SECNAME_start and SECNAME_end @@ -1364,7 +1542,7 @@ template <class ELFT> void Writer<ELFT>::addStartEndSymbols() { Define("__init_array_start", "__init_array_end", Out::InitArray); Define("__fini_array_start", "__fini_array_end", Out::FiniArray); - if (OutputSection *Sec = findSectionInScript(".ARM.exidx")) + if (OutputSection *Sec = findSection(".ARM.exidx")) Define("__exidx_start", "__exidx_end", Sec); } @@ -1382,22 +1560,6 @@ void Writer<ELFT>::addStartStopSymbols(OutputSection *Sec) { addOptionalRegular<ELFT>(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); } -template <class ELFT> -OutputSectionCommand *Writer<ELFT>::findSectionCommand(StringRef Name) { - for (BaseCommand *Base : Script->Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) - if (Cmd->Name == Name) - return Cmd; - return nullptr; -} - -template <class ELFT> -OutputSection *Writer<ELFT>::findSectionInScript(StringRef Name) { - if (OutputSectionCommand *Cmd = findSectionCommand(Name)) - return Cmd->Sec; - return nullptr; -} - static bool needsPtLoad(OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC)) return false; @@ -1424,19 +1586,19 @@ static uint64_t computeFlags(uint64_t Flags) { // Decide which program headers to create and which sections to include in each // one. -template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { - std::vector<PhdrEntry> Ret; +template <class ELFT> std::vector<PhdrEntry *> Writer<ELFT>::createPhdrs() { + std::vector<PhdrEntry *> Ret; auto AddHdr = [&](unsigned Type, unsigned Flags) -> PhdrEntry * { - Ret.emplace_back(Type, Flags); - return &Ret.back(); + Ret.push_back(make<PhdrEntry>(Type, Flags)); + return Ret.back(); }; // The first phdr entry is PT_PHDR which describes the program header itself. AddHdr(PT_PHDR, PF_R)->add(Out::ProgramHeaders); // PT_INTERP must be the second entry if exists. - if (OutputSection *Sec = findSectionInScript(".interp")) - AddHdr(PT_INTERP, Sec->getPhdrFlags())->add(Sec); + if (OutputSection *Cmd = findSection(".interp")) + AddHdr(PT_INTERP, Cmd->getPhdrFlags())->add(Cmd); // Add the first PT_LOAD segment for regular output sections. uint64_t Flags = computeFlags(PF_R); @@ -1446,8 +1608,7 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { Load->add(Out::ElfHeader); Load->add(Out::ProgramHeaders); - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) { if (!(Sec->Flags & SHF_ALLOC)) break; if (!needsPtLoad(Sec)) @@ -1459,7 +1620,7 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { // different flags or is loaded at a discontiguous address using AT linker // script command. uint64_t NewFlags = computeFlags(Sec->getPhdrFlags()); - if (Cmd->LMAExpr || Flags != NewFlags) { + if (Sec->LMAExpr || Flags != NewFlags) { Load = AddHdr(PT_LOAD, NewFlags); Flags = NewFlags; } @@ -1468,14 +1629,12 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { } // Add a TLS segment if any. - PhdrEntry TlsHdr(PT_TLS, PF_R); - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + PhdrEntry *TlsHdr = make<PhdrEntry>(PT_TLS, PF_R); + for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_TLS) - TlsHdr.add(Sec); - } - if (TlsHdr.First) - Ret.push_back(std::move(TlsHdr)); + TlsHdr->add(Sec); + if (TlsHdr->FirstSec) + Ret.push_back(TlsHdr); // Add an entry for .dynamic. if (InX::DynSymTab) @@ -1484,25 +1643,39 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { // PT_GNU_RELRO includes all sections that should be marked as // read-only by dynamic linker after proccessing relocations. - PhdrEntry RelRo(PT_GNU_RELRO, PF_R); - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; - if (needsPtLoad(Sec) && isRelroSection(Sec)) - RelRo.add(Sec); + // Current dynamic loaders only support one PT_GNU_RELRO PHDR, give + // an error message if more than one PT_GNU_RELRO PHDR is required. + PhdrEntry *RelRo = make<PhdrEntry>(PT_GNU_RELRO, PF_R); + bool InRelroPhdr = false; + bool IsRelroFinished = false; + for (OutputSection *Sec : OutputSections) { + if (!needsPtLoad(Sec)) + continue; + if (isRelroSection(Sec)) { + InRelroPhdr = true; + if (!IsRelroFinished) + RelRo->add(Sec); + else + error("section: " + Sec->Name + " is not contiguous with other relro" + + " sections"); + } else if (InRelroPhdr) { + InRelroPhdr = false; + IsRelroFinished = true; + } } - if (RelRo.First) - Ret.push_back(std::move(RelRo)); + if (RelRo->FirstSec) + Ret.push_back(RelRo); // PT_GNU_EH_FRAME is a special section pointing on .eh_frame_hdr. - if (!In<ELFT>::EhFrame->empty() && In<ELFT>::EhFrameHdr && - In<ELFT>::EhFrame->getParent() && In<ELFT>::EhFrameHdr->getParent()) - AddHdr(PT_GNU_EH_FRAME, In<ELFT>::EhFrameHdr->getParent()->getPhdrFlags()) - ->add(In<ELFT>::EhFrameHdr->getParent()); + if (!InX::EhFrame->empty() && InX::EhFrameHdr && InX::EhFrame->getParent() && + InX::EhFrameHdr->getParent()) + AddHdr(PT_GNU_EH_FRAME, InX::EhFrameHdr->getParent()->getPhdrFlags()) + ->add(InX::EhFrameHdr->getParent()); // PT_OPENBSD_RANDOMIZE is an OpenBSD-specific feature. That makes // the dynamic linker fill the segment with random data. - if (OutputSection *Sec = findSectionInScript(".openbsd.randomdata")) - AddHdr(PT_OPENBSD_RANDOMIZE, Sec->getPhdrFlags())->add(Sec); + if (OutputSection *Cmd = findSection(".openbsd.randomdata")) + AddHdr(PT_OPENBSD_RANDOMIZE, Cmd->getPhdrFlags())->add(Cmd); // PT_GNU_STACK is a special section to tell the loader to make the // pages for the stack non-executable. If you really want an executable @@ -1524,10 +1697,9 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { // Create one PT_NOTE per a group of contiguous .note sections. PhdrEntry *Note = nullptr; - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) { if (Sec->Type == SHT_NOTE) { - if (!Note || Cmd->LMAExpr) + if (!Note || Sec->LMAExpr) Note = AddHdr(PT_NOTE, PF_R); Note->add(Sec); } else { @@ -1538,18 +1710,18 @@ template <class ELFT> std::vector<PhdrEntry> Writer<ELFT>::createPhdrs() { } template <class ELFT> -void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { +void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry *> &Phdrs) { if (Config->EMachine != EM_ARM) return; - auto I = llvm::find_if(OutputSectionCommands, [](OutputSectionCommand *Cmd) { - return Cmd->Sec->Type == SHT_ARM_EXIDX; + auto I = llvm::find_if(OutputSections, [](OutputSection *Cmd) { + return Cmd->Type == SHT_ARM_EXIDX; }); - if (I == OutputSectionCommands.end()) + if (I == OutputSections.end()) return; // PT_ARM_EXIDX is the ARM EHABI equivalent of PT_GNU_EH_FRAME - PhdrEntry ARMExidx(PT_ARM_EXIDX, PF_R); - ARMExidx.add((*I)->Sec); + PhdrEntry *ARMExidx = make<PhdrEntry>(PT_ARM_EXIDX, PF_R); + ARMExidx->add(*I); Phdrs.push_back(ARMExidx); } @@ -1557,33 +1729,31 @@ void Writer<ELFT>::addPtArmExid(std::vector<PhdrEntry> &Phdrs) { // first section after PT_GNU_RELRO have to be page aligned so that the dynamic // linker can set the permissions. template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { - auto PageAlign = [](OutputSection *Sec) { - OutputSectionCommand *Cmd = Script->getCmd(Sec); + auto PageAlign = [](OutputSection *Cmd) { if (Cmd && !Cmd->AddrExpr) Cmd->AddrExpr = [=] { return alignTo(Script->getDot(), Config->MaxPageSize); }; }; - for (const PhdrEntry &P : Phdrs) - if (P.p_type == PT_LOAD && P.First) - PageAlign(P.First); + for (const PhdrEntry *P : Phdrs) + if (P->p_type == PT_LOAD && P->FirstSec) + PageAlign(P->FirstSec); - for (const PhdrEntry &P : Phdrs) { - if (P.p_type != PT_GNU_RELRO) + for (const PhdrEntry *P : Phdrs) { + if (P->p_type != PT_GNU_RELRO) continue; - if (P.First) - PageAlign(P.First); + if (P->FirstSec) + PageAlign(P->FirstSec); // Find the first section after PT_GNU_RELRO. If it is in a PT_LOAD we // have to align it to a page. - auto End = OutputSectionCommands.end(); - auto I = - std::find(OutputSectionCommands.begin(), End, Script->getCmd(P.Last)); + auto End = OutputSections.end(); + auto I = std::find(OutputSections.begin(), End, P->LastSec); if (I == End || (I + 1) == End) continue; - OutputSection *Sec = (*(I + 1))->Sec; - if (needsPtLoad(Sec)) - PageAlign(Sec); + OutputSection *Cmd = (*(I + 1)); + if (needsPtLoad(Cmd)) + PageAlign(Cmd); } } @@ -1591,40 +1761,39 @@ template <class ELFT> void Writer<ELFT>::fixSectionAlignments() { // its new file offset. The file offset must be the same with its // virtual address (modulo the page size) so that the loader can load // executables without any address adjustment. -static uint64_t getFileAlignment(uint64_t Off, OutputSection *Sec) { - OutputSection *First = Sec->FirstInPtLoad; +static uint64_t getFileAlignment(uint64_t Off, OutputSection *Cmd) { // If the section is not in a PT_LOAD, we just have to align it. - if (!First) - return alignTo(Off, Sec->Alignment); + if (!Cmd->PtLoad) + return alignTo(Off, Cmd->Alignment); + OutputSection *First = Cmd->PtLoad->FirstSec; // The first section in a PT_LOAD has to have congruent offset and address // module the page size. - if (Sec == First) - return alignTo(Off, Config->MaxPageSize, Sec->Addr); + if (Cmd == First) + return alignTo(Off, std::max<uint64_t>(Cmd->Alignment, Config->MaxPageSize), + Cmd->Addr); // If two sections share the same PT_LOAD the file offset is calculated // using this formula: Off2 = Off1 + (VA2 - VA1). - return First->Offset + Sec->Addr - First->Addr; + return First->Offset + Cmd->Addr - First->Addr; } -static uint64_t setOffset(OutputSection *Sec, uint64_t Off) { - if (Sec->Type == SHT_NOBITS) { - Sec->Offset = Off; +static uint64_t setOffset(OutputSection *Cmd, uint64_t Off) { + if (Cmd->Type == SHT_NOBITS) { + Cmd->Offset = Off; return Off; } - Off = getFileAlignment(Off, Sec); - Sec->Offset = Off; - return Off + Sec->Size; + Off = getFileAlignment(Off, Cmd); + Cmd->Offset = Off; + return Off + Cmd->Size; } template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() { uint64_t Off = 0; - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) Off = setOffset(Sec, Off); - } FileSize = alignTo(Off, Config->Wordsize); } @@ -1634,46 +1803,58 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() { Off = setOffset(Out::ElfHeader, Off); Off = setOffset(Out::ProgramHeaders, Off); - for (OutputSectionCommand *Cmd : OutputSectionCommands) - Off = setOffset(Cmd->Sec, Off); + PhdrEntry *LastRX = nullptr; + for (PhdrEntry *P : Phdrs) + if (P->p_type == PT_LOAD && (P->p_flags & PF_X)) + LastRX = P; + + for (OutputSection *Sec : OutputSections) { + Off = setOffset(Sec, Off); + if (Script->HasSectionsCommand) + continue; + // If this is a last section of the last executable segment and that + // segment is the last loadable segment, align the offset of the + // following section to avoid loading non-segments parts of the file. + if (LastRX && LastRX->LastSec == Sec) + Off = alignTo(Off, Target->PageSize); + } SectionHeaderOff = alignTo(Off, Config->Wordsize); - FileSize = - SectionHeaderOff + (OutputSectionCommands.size() + 1) * sizeof(Elf_Shdr); + FileSize = SectionHeaderOff + (OutputSections.size() + 1) * sizeof(Elf_Shdr); } // Finalize the program headers. We call this function after we assign // file offsets and VAs to all sections. template <class ELFT> void Writer<ELFT>::setPhdrs() { - for (PhdrEntry &P : Phdrs) { - OutputSection *First = P.First; - OutputSection *Last = P.Last; + for (PhdrEntry *P : Phdrs) { + OutputSection *First = P->FirstSec; + OutputSection *Last = P->LastSec; if (First) { - P.p_filesz = Last->Offset - First->Offset; + P->p_filesz = Last->Offset - First->Offset; if (Last->Type != SHT_NOBITS) - P.p_filesz += Last->Size; - P.p_memsz = Last->Addr + Last->Size - First->Addr; - P.p_offset = First->Offset; - P.p_vaddr = First->Addr; - if (!P.HasLMA) - P.p_paddr = First->getLMA(); + P->p_filesz += Last->Size; + P->p_memsz = Last->Addr + Last->Size - First->Addr; + P->p_offset = First->Offset; + P->p_vaddr = First->Addr; + if (!P->HasLMA) + P->p_paddr = First->getLMA(); } - if (P.p_type == PT_LOAD) - P.p_align = Config->MaxPageSize; - else if (P.p_type == PT_GNU_RELRO) { - P.p_align = 1; + if (P->p_type == PT_LOAD) + P->p_align = std::max<uint64_t>(P->p_align, Config->MaxPageSize); + else if (P->p_type == PT_GNU_RELRO) { + P->p_align = 1; // The glibc dynamic loader rounds the size down, so we need to round up // to protect the last page. This is a no-op on FreeBSD which always // rounds up. - P.p_memsz = alignTo(P.p_memsz, Target->PageSize); + P->p_memsz = alignTo(P->p_memsz, Target->PageSize); } // The TLS pointer goes after PT_TLS. At least glibc will align it, // so round up the size to make sure the offsets are correct. - if (P.p_type == PT_TLS) { - Out::TlsPhdr = &P; - if (P.p_memsz) - P.p_memsz = alignTo(P.p_memsz, P.p_align); + if (P->p_type == PT_TLS) { + Out::TlsPhdr = P; + if (P->p_memsz) + P->p_memsz = alignTo(P->p_memsz, P->p_align); } } } @@ -1682,27 +1863,29 @@ template <class ELFT> void Writer<ELFT>::setPhdrs() { // // 1. the '-e' entry command-line option; // 2. the ENTRY(symbol) command in a linker control script; -// 3. the value of the symbol start, if present; -// 4. the address of the first byte of the .text section, if present; -// 5. the address 0. +// 3. the value of the symbol _start, if present; +// 4. the number represented by the entry symbol, if it is a number; +// 5. the address of the first byte of the .text section, if present; +// 6. the address 0. template <class ELFT> uint64_t Writer<ELFT>::getEntryAddr() { - // Case 1, 2 or 3. As a special case, if the symbol is actually - // a number, we'll use that number as an address. - if (SymbolBody *B = Symtab<ELFT>::X->find(Config->Entry)) + // Case 1, 2 or 3 + if (Symbol *B = Symtab->find(Config->Entry)) return B->getVA(); + + // Case 4 uint64_t Addr; if (to_integer(Config->Entry, Addr)) return Addr; - // Case 4 - if (OutputSection *Sec = findSectionInScript(".text")) { + // Case 5 + if (OutputSection *Sec = findSection(".text")) { if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + utohexstr(Sec->Addr)); return Sec->Addr; } - // Case 5 + // Case 6 if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; not setting start address"); @@ -1717,64 +1900,6 @@ static uint16_t getELFType() { return ET_EXEC; } -// This function is called after we have assigned address and size -// to each section. This function fixes some predefined -// symbol values that depend on section address and size. -template <class ELFT> void Writer<ELFT>::fixPredefinedSymbols() { - // _etext is the first location after the last read-only loadable segment. - // _edata is the first location after the last read-write loadable segment. - // _end is the first location after the uninitialized data region. - PhdrEntry *Last = nullptr; - PhdrEntry *LastRO = nullptr; - PhdrEntry *LastRW = nullptr; - for (PhdrEntry &P : Phdrs) { - if (P.p_type != PT_LOAD) - continue; - Last = &P; - if (P.p_flags & PF_W) - LastRW = &P; - else - LastRO = &P; - } - - auto Set = [](DefinedRegular *S, OutputSection *Sec, uint64_t Value) { - if (S) { - S->Section = Sec; - S->Value = Value; - } - }; - - if (Last) { - Set(ElfSym::End1, Last->First, Last->p_memsz); - Set(ElfSym::End2, Last->First, Last->p_memsz); - } - if (LastRO) { - Set(ElfSym::Etext1, LastRO->First, LastRO->p_filesz); - Set(ElfSym::Etext2, LastRO->First, LastRO->p_filesz); - } - if (LastRW) { - Set(ElfSym::Edata1, LastRW->First, LastRW->p_filesz); - Set(ElfSym::Edata2, LastRW->First, LastRW->p_filesz); - } - - if (ElfSym::Bss) - ElfSym::Bss->Section = findSectionInScript(".bss"); - - // Setup MIPS _gp_disp/__gnu_local_gp symbols which should - // be equal to the _gp symbol's value. - if (Config->EMachine == EM_MIPS && !ElfSym::MipsGp->Value) { - // Find GP-relative section with the lowest address - // and use this address to calculate default _gp value. - for (const OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *OS = Cmd->Sec; - if (OS->Flags & SHF_MIPS_GPREL) { - ElfSym::MipsGp->Value = OS->Addr + 0x7ff0; - break; - } - } - } -} - template <class ELFT> void Writer<ELFT>::writeHeader() { uint8_t *Buf = Buffer->getBufferStart(); memcpy(Buf, "\177ELF", 4); @@ -1790,20 +1915,13 @@ template <class ELFT> void Writer<ELFT>::writeHeader() { EHdr->e_version = EV_CURRENT; EHdr->e_entry = getEntryAddr(); EHdr->e_shoff = SectionHeaderOff; + EHdr->e_flags = Config->EFlags; EHdr->e_ehsize = sizeof(Elf_Ehdr); EHdr->e_phnum = Phdrs.size(); EHdr->e_shentsize = sizeof(Elf_Shdr); - EHdr->e_shnum = OutputSectionCommands.size() + 1; + EHdr->e_shnum = OutputSections.size() + 1; EHdr->e_shstrndx = InX::ShStrTab->getParent()->SectionIndex; - if (Config->EMachine == EM_ARM) - // We don't currently use any features incompatible with EF_ARM_EABI_VER5, - // but we don't have any firm guarantees of conformance. Linux AArch64 - // kernels (as of 2016) require an EABI version to be set. - EHdr->e_flags = EF_ARM_EABI_VER5; - else if (Config->EMachine == EM_MIPS) - EHdr->e_flags = getMipsEFlags<ELFT>(); - if (!Config->Relocatable) { EHdr->e_phoff = sizeof(Elf_Ehdr); EHdr->e_phentsize = sizeof(Elf_Phdr); @@ -1811,22 +1929,22 @@ template <class ELFT> void Writer<ELFT>::writeHeader() { // Write the program header table. auto *HBuf = reinterpret_cast<Elf_Phdr *>(Buf + EHdr->e_phoff); - for (PhdrEntry &P : Phdrs) { - HBuf->p_type = P.p_type; - HBuf->p_flags = P.p_flags; - HBuf->p_offset = P.p_offset; - HBuf->p_vaddr = P.p_vaddr; - HBuf->p_paddr = P.p_paddr; - HBuf->p_filesz = P.p_filesz; - HBuf->p_memsz = P.p_memsz; - HBuf->p_align = P.p_align; + for (PhdrEntry *P : Phdrs) { + HBuf->p_type = P->p_type; + HBuf->p_flags = P->p_flags; + HBuf->p_offset = P->p_offset; + HBuf->p_vaddr = P->p_vaddr; + HBuf->p_paddr = P->p_paddr; + HBuf->p_filesz = P->p_filesz; + HBuf->p_memsz = P->p_memsz; + HBuf->p_align = P->p_align; ++HBuf; } // Write the section header table. Note that the first table entry is null. auto *SHdrs = reinterpret_cast<Elf_Shdr *>(Buf + EHdr->e_shoff); - for (OutputSectionCommand *Cmd : OutputSectionCommands) - Cmd->Sec->writeHeaderTo<ELFT>(++SHdrs); + for (OutputSection *Sec : OutputSections) + Sec->writeHeaderTo<ELFT>(++SHdrs); } // Open a result file. @@ -1837,23 +1955,58 @@ template <class ELFT> void Writer<ELFT>::openFile() { } unlinkAsync(Config->OutputFile); - ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr = - FileOutputBuffer::create(Config->OutputFile, FileSize, - FileOutputBuffer::F_executable); + unsigned Flags = 0; + if (!Config->Relocatable) + Flags = FileOutputBuffer::F_executable; + Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr = + FileOutputBuffer::create(Config->OutputFile, FileSize, Flags); - if (auto EC = BufferOrErr.getError()) - error("failed to open " + Config->OutputFile + ": " + EC.message()); + if (!BufferOrErr) + error("failed to open " + Config->OutputFile + ": " + + llvm::toString(BufferOrErr.takeError())); else Buffer = std::move(*BufferOrErr); } template <class ELFT> void Writer<ELFT>::writeSectionsBinary() { uint8_t *Buf = Buffer->getBufferStart(); - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) if (Sec->Flags & SHF_ALLOC) - Cmd->writeTo<ELFT>(Buf + Sec->Offset); - } + Sec->writeTo<ELFT>(Buf + Sec->Offset); +} + +static void fillTrap(uint8_t *I, uint8_t *End) { + for (; I + 4 <= End; I += 4) + memcpy(I, &Target->TrapInstr, 4); +} + +// Fill the last page of executable segments with trap instructions +// instead of leaving them as zero. Even though it is not required by any +// standard, it is in general a good thing to do for security reasons. +// +// We'll leave other pages in segments as-is because the rest will be +// overwritten by output sections. +template <class ELFT> void Writer<ELFT>::writeTrapInstr() { + if (Script->HasSectionsCommand) + return; + + // Fill the last page. + uint8_t *Buf = Buffer->getBufferStart(); + for (PhdrEntry *P : Phdrs) + if (P->p_type == PT_LOAD && (P->p_flags & PF_X)) + fillTrap(Buf + alignDown(P->p_offset + P->p_filesz, Target->PageSize), + Buf + alignTo(P->p_offset + P->p_filesz, Target->PageSize)); + + // Round up the file size of the last segment to the page boundary iff it is + // an executable segment to ensure that other tools don't accidentally + // trim the instruction padding (e.g. when stripping the file). + PhdrEntry *Last = nullptr; + for (PhdrEntry *P : Phdrs) + if (P->p_type == PT_LOAD) + Last = P; + + if (Last && (Last->p_flags & PF_X)) + Last->p_memsz = Last->p_filesz = alignTo(Last->p_filesz, Target->PageSize); } // Write section contents to a mmap'ed file. @@ -1862,39 +2015,32 @@ template <class ELFT> void Writer<ELFT>::writeSections() { // PPC64 needs to process relocations in the .opd section // before processing relocations in code-containing sections. - if (auto *OpdCmd = findSectionCommand(".opd")) { - Out::Opd = OpdCmd->Sec; + if (auto *OpdCmd = findSection(".opd")) { + Out::Opd = OpdCmd; Out::OpdBuf = Buf + Out::Opd->Offset; OpdCmd->template writeTo<ELFT>(Buf + Out::Opd->Offset); } - OutputSection *EhFrameHdr = - (In<ELFT>::EhFrameHdr && !In<ELFT>::EhFrameHdr->empty()) - ? In<ELFT>::EhFrameHdr->getParent() - : nullptr; + OutputSection *EhFrameHdr = nullptr; + if (InX::EhFrameHdr && !InX::EhFrameHdr->empty()) + EhFrameHdr = InX::EhFrameHdr->getParent(); // In -r or -emit-relocs mode, write the relocation sections first as in // ELf_Rel targets we might find out that we need to modify the relocated // section while doing it. - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) - Cmd->writeTo<ELFT>(Buf + Sec->Offset); - } + Sec->writeTo<ELFT>(Buf + Sec->Offset); - for (OutputSectionCommand *Cmd : OutputSectionCommands) { - OutputSection *Sec = Cmd->Sec; + for (OutputSection *Sec : OutputSections) if (Sec != Out::Opd && Sec != EhFrameHdr && Sec->Type != SHT_REL && Sec->Type != SHT_RELA) - Cmd->writeTo<ELFT>(Buf + Sec->Offset); - } + Sec->writeTo<ELFT>(Buf + Sec->Offset); // The .eh_frame_hdr depends on .eh_frame section contents, therefore // it should be written after .eh_frame is written. - if (EhFrameHdr) { - OutputSectionCommand *Cmd = Script->getCmd(EhFrameHdr); - Cmd->writeTo<ELFT>(Buf + EhFrameHdr->Offset); - } + if (EhFrameHdr) + EhFrameHdr->writeTo<ELFT>(Buf + EhFrameHdr->Offset); } template <class ELFT> void Writer<ELFT>::writeBuildId() { @@ -1911,3 +2057,8 @@ template void elf::writeResult<ELF32LE>(); template void elf::writeResult<ELF32BE>(); template void elf::writeResult<ELF64LE>(); template void elf::writeResult<ELF64BE>(); + +template void elf::addReservedSymbols<ELF32LE>(); +template void elf::addReservedSymbols<ELF32BE>(); +template void elf::addReservedSymbols<ELF64LE>(); +template void elf::addReservedSymbols<ELF64BE>(); |