diff options
Diffstat (limited to 'contrib/llvm/tools/lld/ELF/LinkerScript.cpp')
| -rw-r--r-- | contrib/llvm/tools/lld/ELF/LinkerScript.cpp | 2221 |
1 files changed, 733 insertions, 1488 deletions
diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index 3cc235386b88..1ced3e8e8d71 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -13,27 +13,24 @@ #include "LinkerScript.h" #include "Config.h" -#include "Driver.h" #include "InputSection.h" #include "Memory.h" #include "OutputSections.h" -#include "ScriptParser.h" #include "Strings.h" #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" +#include "Threads.h" #include "Writer.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/ELF.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include <algorithm> #include <cassert> @@ -41,9 +38,7 @@ #include <cstdint> #include <iterator> #include <limits> -#include <memory> #include <string> -#include <tuple> #include <vector> using namespace llvm; @@ -53,77 +48,148 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -LinkerScriptBase *elf::ScriptBase; -ScriptConfiguration *elf::ScriptConfig; +LinkerScript *elf::Script; + +uint64_t ExprValue::getValue() const { + if (Sec) { + if (OutputSection *OS = Sec->getOutputSection()) + return alignTo(Sec->getOffset(Val) + OS->Addr, Alignment); + error("unable to evaluate expression: input section " + Sec->Name + + " has no output section assigned"); + } + return alignTo(Val, Alignment); +} + +uint64_t ExprValue::getSecAddr() const { + if (Sec) + return Sec->getOffset(0) + Sec->getOutputSection()->Addr; + return 0; +} template <class ELFT> static SymbolBody *addRegular(SymbolAssignment *Cmd) { + Symbol *Sym; uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; - Symbol *Sym = Symtab<ELFT>::X->addUndefined( - Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, - /*Type*/ 0, - /*CanOmitFromDynSym*/ false, /*File*/ nullptr); - - replaceBody<DefinedRegular<ELFT>>(Sym, Cmd->Name, /*IsLocal=*/false, - Visibility, STT_NOTYPE, 0, 0, nullptr, - nullptr); + std::tie(Sym, std::ignore) = Symtab<ELFT>::X->insert( + Cmd->Name, /*Type*/ 0, Visibility, /*CanOmitFromDynSym*/ false, + /*File*/ nullptr); + Sym->Binding = STB_GLOBAL; + ExprValue Value = Cmd->Expression(); + SectionBase *Sec = Value.isAbsolute() ? nullptr : Value.Sec; + + // We want to set symbol values early if we can. This allows us to use symbols + // as variables in linker scripts. Doing so allows us to write expressions + // like this: `alignment = 16; . = ALIGN(., alignment)` + uint64_t SymValue = Value.isAbsolute() ? Value.getValue() : 0; + replaceBody<DefinedRegular>(Sym, Cmd->Name, /*IsLocal=*/false, Visibility, + STT_NOTYPE, SymValue, 0, Sec, nullptr); return Sym->body(); } -template <class ELFT> static SymbolBody *addSynthetic(SymbolAssignment *Cmd) { - uint8_t Visibility = Cmd->Hidden ? STV_HIDDEN : STV_DEFAULT; - const OutputSectionBase *Sec = - ScriptConfig->HasSections ? nullptr : Cmd->Expression.Section(); - Symbol *Sym = Symtab<ELFT>::X->addUndefined( - Cmd->Name, /*IsLocal=*/false, STB_GLOBAL, Visibility, - /*Type*/ 0, - /*CanOmitFromDynSym*/ false, /*File*/ nullptr); - - replaceBody<DefinedSynthetic>(Sym, Cmd->Name, 0, Sec); - return Sym->body(); +OutputSectionCommand * +LinkerScript::createOutputSectionCommand(StringRef Name, StringRef Location) { + OutputSectionCommand *&CmdRef = NameToOutputSectionCommand[Name]; + OutputSectionCommand *Cmd; + if (CmdRef && CmdRef->Location.empty()) { + // There was a forward reference. + Cmd = CmdRef; + } else { + Cmd = make<OutputSectionCommand>(Name); + if (!CmdRef) + CmdRef = Cmd; + } + Cmd->Location = Location; + return Cmd; } -static bool isUnderSysroot(StringRef Path) { - if (Config->Sysroot == "") - return false; - for (; !Path.empty(); Path = sys::path::parent_path(Path)) - if (sys::fs::equivalent(Config->Sysroot, Path)) - return true; - return false; +OutputSectionCommand * +LinkerScript::getOrCreateOutputSectionCommand(StringRef Name) { + OutputSectionCommand *&CmdRef = NameToOutputSectionCommand[Name]; + if (!CmdRef) + CmdRef = make<OutputSectionCommand>(Name); + return CmdRef; +} + +void LinkerScript::setDot(Expr E, const Twine &Loc, bool InSec) { + uint64_t Val = E().getValue(); + if (Val < Dot) { + if (InSec) + error(Loc + ": unable to move location counter backward for: " + + CurOutSec->Name); + else + error(Loc + ": unable to move location counter backward"); + } + Dot = Val; + // Update to location counter means update to section size. + if (InSec) + CurOutSec->Size = Dot - CurOutSec->Addr; } -template <class ELFT> static void assignSymbol(SymbolAssignment *Cmd) { - // If there are sections, then let the value be assigned later in - // `assignAddresses`. - if (ScriptConfig->HasSections) +// Sets value of a symbol. Two kinds of symbols are processed: synthetic +// symbols, whose value is an offset from beginning of section and regular +// symbols whose value is absolute. +void LinkerScript::assignSymbol(SymbolAssignment *Cmd, bool InSec) { + if (Cmd->Name == ".") { + setDot(Cmd->Expression, Cmd->Location, InSec); return; + } - uint64_t Value = Cmd->Expression(0); - if (Cmd->Expression.IsAbsolute()) { - cast<DefinedRegular<ELFT>>(Cmd->Sym)->Value = Value; + if (!Cmd->Sym) + return; + + auto *Sym = cast<DefinedRegular>(Cmd->Sym); + ExprValue V = Cmd->Expression(); + if (V.isAbsolute()) { + Sym->Value = V.getValue(); } else { - const OutputSectionBase *Sec = Cmd->Expression.Section(); - if (Sec) - cast<DefinedSynthetic>(Cmd->Sym)->Value = Value - Sec->Addr; + Sym->Section = V.Sec; + if (Sym->Section->Flags & SHF_ALLOC) + Sym->Value = alignTo(V.Val, V.Alignment); + else + Sym->Value = V.getValue(); + } +} + +static SymbolBody *findSymbol(StringRef S) { + switch (Config->EKind) { + case ELF32LEKind: + return Symtab<ELF32LE>::X->find(S); + case ELF32BEKind: + return Symtab<ELF32BE>::X->find(S); + case ELF64LEKind: + return Symtab<ELF64LE>::X->find(S); + case ELF64BEKind: + return Symtab<ELF64BE>::X->find(S); + default: + llvm_unreachable("unknown Config->EKind"); } } -template <class ELFT> static void addSymbol(SymbolAssignment *Cmd) { +static SymbolBody *addRegularSymbol(SymbolAssignment *Cmd) { + switch (Config->EKind) { + case ELF32LEKind: + return addRegular<ELF32LE>(Cmd); + case ELF32BEKind: + return addRegular<ELF32BE>(Cmd); + case ELF64LEKind: + return addRegular<ELF64LE>(Cmd); + case ELF64BEKind: + return addRegular<ELF64BE>(Cmd); + default: + llvm_unreachable("unknown Config->EKind"); + } +} + +void LinkerScript::addSymbol(SymbolAssignment *Cmd) { if (Cmd->Name == ".") return; // If a symbol was in PROVIDE(), we need to define it only when // it is a referenced undefined symbol. - SymbolBody *B = Symtab<ELFT>::X->find(Cmd->Name); + SymbolBody *B = findSymbol(Cmd->Name); if (Cmd->Provide && (!B || B->isDefined())) return; - // Otherwise, create a new symbol if one does not exist or an - // undefined one does exist. - if (Cmd->Expression.IsAbsolute()) - Cmd->Sym = addRegular<ELFT>(Cmd); - else - Cmd->Sym = addSynthetic<ELFT>(Cmd); - assignSymbol<ELFT>(Cmd); + Cmd->Sym = addRegularSymbol(Cmd); } bool SymbolAssignment::classof(const BaseCommand *C) { @@ -134,6 +200,15 @@ bool OutputSectionCommand::classof(const BaseCommand *C) { return C->Kind == OutputSectionKind; } +// Fill [Buf, Buf + Size) with Filler. +// This is used for linker script "=fillexp" command. +static void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { + size_t I = 0; + for (; I + 4 < Size; I += 4) + memcpy(Buf + I, &Filler, 4); + memcpy(Buf + I, &Filler, Size - I); +} + bool InputSectionDescription::classof(const BaseCommand *C) { return C->Kind == InputSectionKind; } @@ -146,17 +221,13 @@ bool BytesDataCommand::classof(const BaseCommand *C) { return C->Kind == BytesDataKind; } -template <class ELFT> LinkerScript<ELFT>::LinkerScript() = default; -template <class ELFT> LinkerScript<ELFT>::~LinkerScript() = default; - -template <class ELFT> static StringRef basename(InputSectionBase<ELFT> *S) { - if (S->getFile()) - return sys::path::filename(S->getFile()->getName()); +static StringRef basename(InputSectionBase *S) { + if (S->File) + return sys::path::filename(S->File->getName()); return ""; } -template <class ELFT> -bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { +bool LinkerScript::shouldKeep(InputSectionBase *S) { for (InputSectionDescription *ID : Opt.KeptSections) if (ID->FilePat.match(basename(S))) for (SectionPattern &P : ID->SectionPatterns) @@ -165,73 +236,82 @@ bool LinkerScript<ELFT>::shouldKeep(InputSectionBase<ELFT> *S) { return false; } -static bool comparePriority(InputSectionData *A, InputSectionData *B) { - return getPriority(A->Name) < getPriority(B->Name); -} - -static bool compareName(InputSectionData *A, InputSectionData *B) { - return A->Name < B->Name; -} - -static bool compareAlignment(InputSectionData *A, InputSectionData *B) { - // ">" is not a mistake. Larger alignments are placed before smaller - // alignments in order to reduce the amount of padding necessary. - // This is compatible with GNU. - return A->Alignment > B->Alignment; -} - -static std::function<bool(InputSectionData *, InputSectionData *)> +// A helper function for the SORT() command. +static std::function<bool(InputSectionBase *, InputSectionBase *)> getComparator(SortSectionPolicy K) { switch (K) { case SortSectionPolicy::Alignment: - return compareAlignment; + return [](InputSectionBase *A, InputSectionBase *B) { + // ">" is not a mistake. Sections with larger alignments are placed + // before sections with smaller alignments in order to reduce the + // amount of padding necessary. This is compatible with GNU. + return A->Alignment > B->Alignment; + }; case SortSectionPolicy::Name: - return compareName; + return [](InputSectionBase *A, InputSectionBase *B) { + return A->Name < B->Name; + }; case SortSectionPolicy::Priority: - return comparePriority; + return [](InputSectionBase *A, InputSectionBase *B) { + return getPriority(A->Name) < getPriority(B->Name); + }; default: llvm_unreachable("unknown sort policy"); } } -template <class ELFT> -static bool matchConstraints(ArrayRef<InputSectionBase<ELFT> *> Sections, +// A helper function for the SORT() command. +static bool matchConstraints(ArrayRef<InputSectionBase *> Sections, ConstraintKind Kind) { if (Kind == ConstraintKind::NoConstraint) return true; - bool IsRW = llvm::any_of(Sections, [=](InputSectionData *Sec2) { - auto *Sec = static_cast<InputSectionBase<ELFT> *>(Sec2); - return Sec->Flags & SHF_WRITE; + + bool IsRW = llvm::any_of(Sections, [](InputSectionBase *Sec) { + return static_cast<InputSectionBase *>(Sec)->Flags & SHF_WRITE; }); + return (IsRW && Kind == ConstraintKind::ReadWrite) || (!IsRW && Kind == ConstraintKind::ReadOnly); } -static void sortSections(InputSectionData **Begin, InputSectionData **End, +static void sortSections(InputSection **Begin, InputSection **End, SortSectionPolicy K) { if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) std::stable_sort(Begin, End, getComparator(K)); } // Compute and remember which sections the InputSectionDescription matches. -template <class ELFT> -void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { - // Collects all sections that satisfy constraints of I - // and attach them to I. - for (SectionPattern &Pat : I->SectionPatterns) { - size_t SizeBefore = I->Sections.size(); - - for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) { - if (!S->Live || S->Assigned) +std::vector<InputSection *> +LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { + std::vector<InputSection *> Ret; + + // Collects all sections that satisfy constraints of Cmd. + for (const SectionPattern &Pat : Cmd->SectionPatterns) { + size_t SizeBefore = Ret.size(); + + for (InputSectionBase *Sec : InputSections) { + if (Sec->Assigned) continue; - StringRef Filename = basename(S); - if (!I->FilePat.match(Filename) || Pat.ExcludedFilePat.match(Filename)) + if (!Sec->Live) { + reportDiscarded(Sec); + continue; + } + + // For -emit-relocs we have to ignore entries like + // .rela.dyn : { *(.rela.data) } + // which are common because they are in the default bfd script. + if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) continue; - if (!Pat.SectionPat.match(S->Name)) + + StringRef Filename = basename(Sec); + if (!Cmd->FilePat.match(Filename) || + Pat.ExcludedFilePat.match(Filename) || + !Pat.SectionPat.match(Sec->Name)) continue; - I->Sections.push_back(S); - S->Assigned = true; + + Ret.push_back(cast<InputSection>(Sec)); + Sec->Assigned = true; } // Sort sections as instructed by SORT-family commands and --sort-section @@ -245,8 +325,8 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { // --sort-section is handled as an inner SORT command. // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. - InputSectionData **Begin = I->Sections.data() + SizeBefore; - InputSectionData **End = I->Sections.data() + I->Sections.size(); + InputSection **Begin = Ret.data() + SizeBefore; + InputSection **End = Ret.data() + Ret.size(); if (Pat.SortOuter != SortSectionPolicy::None) { if (Pat.SortInner == SortSectionPolicy::Default) sortSections(Begin, End, Config->SortSection); @@ -255,68 +335,58 @@ void LinkerScript<ELFT>::computeInputSections(InputSectionDescription *I) { sortSections(Begin, End, Pat.SortOuter); } } + return Ret; } -template <class ELFT> -void LinkerScript<ELFT>::discard(ArrayRef<InputSectionBase<ELFT> *> V) { - for (InputSectionBase<ELFT> *S : V) { +void LinkerScript::discard(ArrayRef<InputSectionBase *> V) { + for (InputSectionBase *S : V) { S->Live = false; - reportDiscarded(S); + if (S == InX::ShStrTab) + error("discarding .shstrtab section is not allowed"); + discard(S->DependentSections); } } -template <class ELFT> -std::vector<InputSectionBase<ELFT> *> -LinkerScript<ELFT>::createInputSectionList(OutputSectionCommand &OutCmd) { - std::vector<InputSectionBase<ELFT> *> Ret; +std::vector<InputSectionBase *> +LinkerScript::createInputSectionList(OutputSectionCommand &OutCmd) { + std::vector<InputSectionBase *> Ret; - for (const std::unique_ptr<BaseCommand> &Base : OutCmd.Commands) { - auto *Cmd = dyn_cast<InputSectionDescription>(Base.get()); + for (BaseCommand *Base : OutCmd.Commands) { + auto *Cmd = dyn_cast<InputSectionDescription>(Base); if (!Cmd) continue; - computeInputSections(Cmd); - for (InputSectionData *S : Cmd->Sections) - Ret.push_back(static_cast<InputSectionBase<ELFT> *>(S)); + + Cmd->Sections = computeInputSections(Cmd); + Ret.insert(Ret.end(), Cmd->Sections.begin(), Cmd->Sections.end()); } return Ret; } -template <class ELFT> -void LinkerScript<ELFT>::addSection(OutputSectionFactory<ELFT> &Factory, - InputSectionBase<ELFT> *Sec, - StringRef Name) { - OutputSectionBase *OutSec; - bool IsNew; - std::tie(OutSec, IsNew) = Factory.create(Sec, Name); - if (IsNew) - OutputSections->push_back(OutSec); - OutSec->addSection(Sec); -} - -template <class ELFT> -void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { - for (unsigned I = 0; I < Opt.Commands.size(); ++I) { - auto Iter = Opt.Commands.begin() + I; - const std::unique_ptr<BaseCommand> &Base1 = *Iter; +void LinkerScript::processCommands(OutputSectionFactory &Factory) { + // A symbol can be assigned before any section is mentioned in the linker + // script. In an DSO, the symbol values are addresses, so the only important + // section values are: + // * SHN_UNDEF + // * SHN_ABS + // * Any value meaning a regular section. + // To handle that, create a dummy aether section that fills the void before + // the linker scripts switches to another section. It has an index of one + // which will map to whatever the first actual section is. + Aether = make<OutputSection>("", 0, SHF_ALLOC); + Aether->SectionIndex = 1; + CurOutSec = Aether; + Dot = 0; + for (size_t I = 0; I < Opt.Commands.size(); ++I) { // Handle symbol assignments outside of any output section. - if (auto *Cmd = dyn_cast<SymbolAssignment>(Base1.get())) { - addSymbol<ELFT>(Cmd); - continue; - } - - if (auto *Cmd = dyn_cast<AssertCommand>(Base1.get())) { - // If we don't have SECTIONS then output sections have already been - // created by Writer<ELFT>. The LinkerScript<ELFT>::assignAddresses - // will not be called, so ASSERT should be evaluated now. - if (!Opt.HasSections) - Cmd->Expression(0); + if (auto *Cmd = dyn_cast<SymbolAssignment>(Opt.Commands[I])) { + addSymbol(Cmd); continue; } - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base1.get())) { - std::vector<InputSectionBase<ELFT> *> V = createInputSectionList(*Cmd); + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I])) { + std::vector<InputSectionBase *> V = createInputSectionList(*Cmd); // The output section name `/DISCARD/' is special. // Any input section assigned to it is discarded. @@ -332,211 +402,270 @@ void LinkerScript<ELFT>::processCommands(OutputSectionFactory<ELFT> &Factory) { // // Because we'll iterate over Commands many more times, the easiest // way to "make it as if it wasn't present" is to just remove it. - if (!matchConstraints<ELFT>(V, Cmd->Constraint)) { - for (InputSectionBase<ELFT> *S : V) + if (!matchConstraints(V, Cmd->Constraint)) { + for (InputSectionBase *S : V) S->Assigned = false; - Opt.Commands.erase(Iter); + Opt.Commands.erase(Opt.Commands.begin() + I); --I; continue; } // A directive may contain symbol definitions like this: // ".foo : { ...; bar = .; }". Handle them. - for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) - if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base.get())) - addSymbol<ELFT>(OutCmd); + for (BaseCommand *Base : Cmd->Commands) + if (auto *OutCmd = dyn_cast<SymbolAssignment>(Base)) + addSymbol(OutCmd); // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign // is given, input sections are aligned to that value, whether the // given value is larger or smaller than the original section alignment. if (Cmd->SubalignExpr) { - uint32_t Subalign = Cmd->SubalignExpr(0); - for (InputSectionBase<ELFT> *S : V) + uint32_t Subalign = Cmd->SubalignExpr().getValue(); + for (InputSectionBase *S : V) S->Alignment = Subalign; } // Add input sections to an output section. - for (InputSectionBase<ELFT> *S : V) - addSection(Factory, S, Cmd->Name); + for (InputSectionBase *S : V) + Factory.addInputSec(S, Cmd->Name, Cmd->Sec); + if (OutputSection *Sec = Cmd->Sec) { + assert(Sec->SectionIndex == INT_MAX); + Sec->SectionIndex = I; + SecToCommand[Sec] = Cmd; + } } } + CurOutSec = nullptr; } -// Add sections that didn't match any sections command. -template <class ELFT> -void LinkerScript<ELFT>::addOrphanSections( - OutputSectionFactory<ELFT> &Factory) { - for (InputSectionBase<ELFT> *S : Symtab<ELFT>::X->Sections) - if (S->Live && !S->OutSec) - addSection(Factory, S, getOutputSectionName(S->Name)); -} +void LinkerScript::fabricateDefaultCommands() { + std::vector<BaseCommand *> Commands; -// Sets value of a section-defined symbol. Two kinds of -// symbols are processed: synthetic symbols, whose value -// is an offset from beginning of section and regular -// symbols whose value is absolute. -template <class ELFT> -static void assignSectionSymbol(SymbolAssignment *Cmd, - typename ELFT::uint Value) { - if (!Cmd->Sym) - return; + // Define start address + uint64_t StartAddr = Config->ImageBase + elf::getHeaderSize(); - if (auto *Body = dyn_cast<DefinedSynthetic>(Cmd->Sym)) { - Body->Section = Cmd->Expression.Section(); - Body->Value = Cmd->Expression(Value) - Body->Section->Addr; - return; + // The Sections with -T<section> have been sorted in order of ascending + // address. We must lower StartAddr if the lowest -T<section address> as + // calls to setDot() must be monotonically increasing. + for (auto& KV : Config->SectionStartMap) + StartAddr = std::min(StartAddr, KV.second); + + Commands.push_back( + make<SymbolAssignment>(".", [=] { return StartAddr; }, "")); + + // For each OutputSection that needs a VA fabricate an OutputSectionCommand + // with an InputSectionDescription describing the InputSections + for (OutputSection *Sec : *OutputSections) { + auto *OSCmd = createOutputSectionCommand(Sec->Name, "<internal>"); + OSCmd->Sec = Sec; + SecToCommand[Sec] = OSCmd; + + // Prefer user supplied address over additional alignment constraint + auto I = Config->SectionStartMap.find(Sec->Name); + if (I != Config->SectionStartMap.end()) + OSCmd->AddrExpr = [=] { return I->second; }; + + Commands.push_back(OSCmd); + if (Sec->Sections.size()) { + auto *ISD = make<InputSectionDescription>(""); + OSCmd->Commands.push_back(ISD); + for (InputSection *ISec : Sec->Sections) { + ISD->Sections.push_back(ISec); + ISec->Assigned = true; + } + } } - auto *Body = cast<DefinedRegular<ELFT>>(Cmd->Sym); - Body->Value = Cmd->Expression(Value); + // SECTIONS commands run before other non SECTIONS commands + Commands.insert(Commands.end(), Opt.Commands.begin(), Opt.Commands.end()); + Opt.Commands = std::move(Commands); } -template <class ELFT> static bool isTbss(OutputSectionBase *Sec) { - return (Sec->Flags & SHF_TLS) && Sec->Type == SHT_NOBITS; +// Add sections that didn't match any sections command. +void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { + for (InputSectionBase *S : InputSections) { + if (!S->Live || S->Parent) + continue; + StringRef Name = getOutputSectionName(S->Name); + auto I = std::find_if( + Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) + return Cmd->Name == Name; + return false; + }); + if (I == Opt.Commands.end()) { + Factory.addInputSec(S, Name); + } else { + auto *Cmd = cast<OutputSectionCommand>(*I); + Factory.addInputSec(S, Name, Cmd->Sec); + if (OutputSection *Sec = Cmd->Sec) { + SecToCommand[Sec] = Cmd; + unsigned Index = std::distance(Opt.Commands.begin(), I); + assert(Sec->SectionIndex == INT_MAX || Sec->SectionIndex == Index); + Sec->SectionIndex = Index; + } + auto *ISD = make<InputSectionDescription>(""); + ISD->Sections.push_back(cast<InputSection>(S)); + Cmd->Commands.push_back(ISD); + } + } } -template <class ELFT> void LinkerScript<ELFT>::output(InputSection<ELFT> *S) { - if (!AlreadyOutputIS.insert(S).second) - return; - bool IsTbss = isTbss<ELFT>(CurOutSec); +uint64_t LinkerScript::advance(uint64_t Size, unsigned Align) { + bool IsTbss = (CurOutSec->Flags & SHF_TLS) && CurOutSec->Type == SHT_NOBITS; + uint64_t Start = IsTbss ? Dot + ThreadBssOffset : Dot; + Start = alignTo(Start, Align); + uint64_t End = Start + Size; + + if (IsTbss) + ThreadBssOffset = End - Dot; + else + Dot = End; + return End; +} - uintX_t Pos = IsTbss ? Dot + ThreadBssOffset : Dot; - Pos = alignTo(Pos, S->Alignment); - S->OutSecOff = Pos - CurOutSec->Addr; - Pos += S->getSize(); +void LinkerScript::output(InputSection *S) { + uint64_t Pos = advance(S->getSize(), S->Alignment); + S->OutSecOff = Pos - S->getSize() - CurOutSec->Addr; // Update output section size after adding each section. This is so that // SIZEOF works correctly in the case below: // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } CurOutSec->Size = Pos - CurOutSec->Addr; - if (IsTbss) - ThreadBssOffset = Pos - Dot; - else - Dot = Pos; -} - -template <class ELFT> void LinkerScript<ELFT>::flush() { - if (!CurOutSec || !AlreadyOutputOS.insert(CurOutSec).second) - return; - if (auto *OutSec = dyn_cast<OutputSection<ELFT>>(CurOutSec)) { - for (InputSection<ELFT> *I : OutSec->Sections) - output(I); - } else { - Dot += CurOutSec->Size; + // If there is a memory region associated with this input section, then + // place the section in that region and update the region index. + if (CurMemRegion) { + CurMemRegion->Offset += CurOutSec->Size; + uint64_t CurSize = CurMemRegion->Offset - CurMemRegion->Origin; + if (CurSize > CurMemRegion->Length) { + uint64_t OverflowAmt = CurSize - CurMemRegion->Length; + error("section '" + CurOutSec->Name + "' will not fit in region '" + + CurMemRegion->Name + "': overflowed by " + Twine(OverflowAmt) + + " bytes"); + } } } -template <class ELFT> -void LinkerScript<ELFT>::switchTo(OutputSectionBase *Sec) { +void LinkerScript::switchTo(OutputSection *Sec) { if (CurOutSec == Sec) return; - if (AlreadyOutputOS.count(Sec)) - return; - flush(); CurOutSec = Sec; - - Dot = alignTo(Dot, CurOutSec->Addralign); - CurOutSec->Addr = isTbss<ELFT>(CurOutSec) ? Dot + ThreadBssOffset : Dot; + CurOutSec->Addr = advance(0, CurOutSec->Alignment); // If neither AT nor AT> is specified for an allocatable section, the linker // will set the LMA such that the difference between VMA and LMA for the // section is the same as the preceding output section in the same region // https://sourceware.org/binutils/docs-2.20/ld/Output-Section-LMA.html - CurOutSec->setLMAOffset(LMAOffset); + if (LMAOffset) + CurOutSec->LMAOffset = LMAOffset(); } -template <class ELFT> void LinkerScript<ELFT>::process(BaseCommand &Base) { - // This handles the assignments to symbol or to a location counter (.) - if (auto *AssignCmd = dyn_cast<SymbolAssignment>(&Base)) { - if (AssignCmd->Name == ".") { - // Update to location counter means update to section size. - uintX_t Val = AssignCmd->Expression(Dot); - if (Val < Dot) - error("unable to move location counter backward for: " + - CurOutSec->Name); - Dot = Val; - CurOutSec->Size = Dot - CurOutSec->Addr; - return; - } - assignSectionSymbol<ELFT>(AssignCmd, Dot); +void LinkerScript::process(BaseCommand &Base) { + // This handles the assignments to symbol or to the dot. + if (auto *Cmd = dyn_cast<SymbolAssignment>(&Base)) { + assignSymbol(Cmd, true); return; } // Handle BYTE(), SHORT(), LONG(), or QUAD(). - if (auto *DataCmd = dyn_cast<BytesDataCommand>(&Base)) { - DataCmd->Offset = Dot - CurOutSec->Addr; - Dot += DataCmd->Size; + if (auto *Cmd = dyn_cast<BytesDataCommand>(&Base)) { + Cmd->Offset = Dot - CurOutSec->Addr; + Dot += Cmd->Size; CurOutSec->Size = Dot - CurOutSec->Addr; return; } - if (auto *AssertCmd = dyn_cast<AssertCommand>(&Base)) { - AssertCmd->Expression(Dot); + // Handle ASSERT(). + if (auto *Cmd = dyn_cast<AssertCommand>(&Base)) { + Cmd->Expression(); return; } - // It handles single input section description command, - // calculates and assigns the offsets for each section and also + // Handle a single input section description command. + // It calculates and assigns the offsets for each section and also // updates the output section size. - auto &ICmd = cast<InputSectionDescription>(Base); - for (InputSectionData *ID : ICmd.Sections) { + auto &Cmd = cast<InputSectionDescription>(Base); + for (InputSection *Sec : Cmd.Sections) { // We tentatively added all synthetic sections at the beginning and removed // empty ones afterwards (because there is no way to know whether they were // going be empty or not other than actually running linker scripts.) // We need to ignore remains of empty sections. - if (auto *Sec = dyn_cast<SyntheticSection<ELFT>>(ID)) - if (Sec->empty()) + if (auto *S = dyn_cast<SyntheticSection>(Sec)) + if (S->empty()) continue; - auto *IB = static_cast<InputSectionBase<ELFT> *>(ID); - switchTo(IB->OutSec); - if (auto *I = dyn_cast<InputSection<ELFT>>(IB)) - output(I); - else - flush(); + if (!Sec->Live) + continue; + assert(CurOutSec == Sec->getParent()); + output(Sec); + } +} + +// This function searches for a memory region to place the given output +// section in. If found, a pointer to the appropriate memory region is +// returned. Otherwise, a nullptr is returned. +MemoryRegion *LinkerScript::findMemoryRegion(OutputSectionCommand *Cmd) { + // If a memory region name was specified in the output section command, + // then try to find that region first. + if (!Cmd->MemoryRegionName.empty()) { + auto It = Opt.MemoryRegions.find(Cmd->MemoryRegionName); + if (It != Opt.MemoryRegions.end()) + return &It->second; + error("memory region '" + Cmd->MemoryRegionName + "' not declared"); + return nullptr; } -} -template <class ELFT> -static std::vector<OutputSectionBase *> -findSections(StringRef Name, const std::vector<OutputSectionBase *> &Sections) { - std::vector<OutputSectionBase *> Ret; - for (OutputSectionBase *Sec : Sections) - if (Sec->getName() == Name) - Ret.push_back(Sec); - return Ret; + // If at least one memory region is defined, all sections must + // belong to some memory region. Otherwise, we don't need to do + // anything for memory regions. + if (Opt.MemoryRegions.empty()) + return nullptr; + + OutputSection *Sec = Cmd->Sec; + // See if a region can be found by matching section flags. + for (auto &Pair : Opt.MemoryRegions) { + MemoryRegion &M = Pair.second; + if ((M.Flags & Sec->Flags) && (M.NegFlags & Sec->Flags) == 0) + return &M; + } + + // Otherwise, no suitable region was found. + if (Sec->Flags & SHF_ALLOC) + error("no memory region specified for section '" + Sec->Name + "'"); + return nullptr; } // This function assigns offsets to input sections and an output section // for a single sections command (e.g. ".text { *(.text); }"). -template <class ELFT> -void LinkerScript<ELFT>::assignOffsets(OutputSectionCommand *Cmd) { - if (Cmd->LMAExpr) - LMAOffset = Cmd->LMAExpr(Dot) - Dot; - std::vector<OutputSectionBase *> Sections = - findSections<ELFT>(Cmd->Name, *OutputSections); - if (Sections.empty()) +void LinkerScript::assignOffsets(OutputSectionCommand *Cmd) { + OutputSection *Sec = Cmd->Sec; + if (!Sec) + return; + + if (Cmd->AddrExpr && (Sec->Flags & SHF_ALLOC)) + setDot(Cmd->AddrExpr, Cmd->Location, false); + + if (Cmd->LMAExpr) { + uint64_t D = Dot; + LMAOffset = [=] { return Cmd->LMAExpr().getValue() - D; }; + } + + CurMemRegion = Cmd->MemRegion; + if (CurMemRegion) + Dot = CurMemRegion->Offset; + switchTo(Sec); + + // We do not support custom layout for compressed debug sectons. + // At this point we already know their size and have compressed content. + if (CurOutSec->Flags & SHF_COMPRESSED) return; - switchTo(Sections[0]); - - // Find the last section output location. We will output orphan sections - // there so that end symbols point to the correct location. - auto E = std::find_if(Cmd->Commands.rbegin(), Cmd->Commands.rend(), - [](const std::unique_ptr<BaseCommand> &Cmd) { - return !isa<SymbolAssignment>(*Cmd); - }) - .base(); - for (auto I = Cmd->Commands.begin(); I != E; ++I) - process(**I); - for (OutputSectionBase *Base : Sections) - switchTo(Base); - flush(); - std::for_each(E, Cmd->Commands.end(), - [this](std::unique_ptr<BaseCommand> &B) { process(*B.get()); }); + + for (BaseCommand *C : Cmd->Commands) + process(*C); } -template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { +void LinkerScript::removeEmptyCommands() { // It is common practice to use very generic linker scripts. So for any // given run some of the output sections in the script will be empty. // We could create corresponding empty output sections, but that would @@ -544,52 +673,62 @@ template <class ELFT> void LinkerScript<ELFT>::removeEmptyCommands() { // We instead remove trivially empty sections. The bfd linker seems even // more aggressive at removing them. auto Pos = std::remove_if( - Opt.Commands.begin(), Opt.Commands.end(), - [&](const std::unique_ptr<BaseCommand> &Base) { - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) - return findSections<ELFT>(Cmd->Name, *OutputSections).empty(); + Opt.Commands.begin(), Opt.Commands.end(), [&](BaseCommand *Base) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) + return std::find(OutputSections->begin(), OutputSections->end(), + Cmd->Sec) == OutputSections->end(); return false; }); Opt.Commands.erase(Pos, Opt.Commands.end()); } static bool isAllSectionDescription(const OutputSectionCommand &Cmd) { - for (const std::unique_ptr<BaseCommand> &I : Cmd.Commands) - if (!isa<InputSectionDescription>(*I)) + for (BaseCommand *Base : Cmd.Commands) + if (!isa<InputSectionDescription>(*Base)) return false; return true; } -template <class ELFT> void LinkerScript<ELFT>::adjustSectionsBeforeSorting() { +void LinkerScript::adjustSectionsBeforeSorting() { // If the output section contains only symbol assignments, create a // corresponding output section. The bfd linker seems to only create them if // '.' is assigned to, but creating these section should not have any bad // consequeces and gives us a section to put the symbol in. - uintX_t Flags = SHF_ALLOC; - uint32_t Type = SHT_NOBITS; - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + uint64_t Flags = SHF_ALLOC; + + for (int I = 0, E = Opt.Commands.size(); I != E; ++I) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I]); if (!Cmd) continue; - std::vector<OutputSectionBase *> Secs = - findSections<ELFT>(Cmd->Name, *OutputSections); - if (!Secs.empty()) { - Flags = Secs[0]->Flags; - Type = Secs[0]->Type; + if (OutputSection *Sec = Cmd->Sec) { + Flags = Sec->Flags; continue; } if (isAllSectionDescription(*Cmd)) continue; - auto *OutSec = make<OutputSection<ELFT>>(Cmd->Name, Type, Flags); + auto *OutSec = make<OutputSection>(Cmd->Name, SHT_PROGBITS, Flags); + OutSec->SectionIndex = I; OutputSections->push_back(OutSec); + Cmd->Sec = OutSec; + SecToCommand[OutSec] = Cmd; } } -template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { +void LinkerScript::adjustSectionsAfterSorting() { placeOrphanSections(); + // Try and find an appropriate memory region to assign offsets in. + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base)) { + Cmd->MemRegion = findMemoryRegion(Cmd); + // Handle align (e.g. ".foo : ALIGN(16) { ... }"). + if (Cmd->AlignExpr) + Cmd->Sec->updateAlignment(Cmd->AlignExpr().getValue()); + } + } + // If output section command doesn't specify any segments, // and we haven't previously assigned any section to segment, // then we simply assign section to the very first load segment. @@ -605,10 +744,11 @@ template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { // Walk the commands and propagate the program headers to commands that don't // explicitly specify them. - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); + for (BaseCommand *Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); if (!Cmd) continue; + if (Cmd->Phdrs.empty()) Cmd->Phdrs = DefPhdrs; else @@ -632,19 +772,37 @@ template <class ELFT> void LinkerScript<ELFT>::adjustSectionsAfterSorting() { // /* The RW PT_LOAD starts here*/ // rw_sec : { *(rw_sec) } // would mean that the RW PT_LOAD would become unaligned. -static bool shouldSkip(const BaseCommand &Cmd) { +static bool shouldSkip(BaseCommand *Cmd) { if (isa<OutputSectionCommand>(Cmd)) return false; - const auto *Assign = dyn_cast<SymbolAssignment>(&Cmd); - if (!Assign) - return true; - return Assign->Name != "."; + if (auto *Assign = dyn_cast<SymbolAssignment>(Cmd)) + return Assign->Name != "."; + return true; } -// Orphan sections are sections present in the input files which are not -// explicitly placed into the output file by the linker script. This just -// places them in the order already decided in OutputSections. -template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { +// Orphan sections are sections present in the input files which are +// not explicitly placed into the output file by the linker script. +// +// When the control reaches this function, Opt.Commands contains +// output section commands for non-orphan sections only. This function +// adds new elements for orphan sections so that all sections are +// explicitly handled by Opt.Commands. +// +// Writer<ELFT>::sortSections has already sorted output sections. +// What we need to do is to scan OutputSections vector and +// Opt.Commands in parallel to find orphan sections. If there is an +// output section that doesn't have a corresponding entry in +// Opt.Commands, we will insert a new entry to Opt.Commands. +// +// There is some ambiguity as to where exactly a new entry should be +// inserted, because Opt.Commands contains not only output section +// commands but also other types of commands such as symbol assignment +// expressions. There's no correct answer here due to the lack of the +// formal specification of the linker script. We use heuristics to +// determine whether a new output command should be added before or +// after another commands. For the details, look at shouldSkip +// function. +void LinkerScript::placeOrphanSections() { // The OutputSections are already in the correct order. // This loops creates or moves commands as needed so that they are in the // correct order. @@ -656,98 +814,190 @@ template <class ELFT> void LinkerScript<ELFT>::placeOrphanSections() { // after that. auto FirstSectionOrDotAssignment = std::find_if(Opt.Commands.begin(), Opt.Commands.end(), - [](const std::unique_ptr<BaseCommand> &Cmd) { - if (isa<OutputSectionCommand>(*Cmd)) - return true; - const auto *Assign = dyn_cast<SymbolAssignment>(Cmd.get()); - if (!Assign) - return false; - return Assign->Name == "."; - }); + [](BaseCommand *Cmd) { return !shouldSkip(Cmd); }); if (FirstSectionOrDotAssignment != Opt.Commands.end()) { CmdIndex = FirstSectionOrDotAssignment - Opt.Commands.begin(); if (isa<SymbolAssignment>(**FirstSectionOrDotAssignment)) ++CmdIndex; } - for (OutputSectionBase *Sec : *OutputSections) { - StringRef Name = Sec->getName(); + for (OutputSection *Sec : *OutputSections) { + StringRef Name = Sec->Name; // Find the last spot where we can insert a command and still get the // correct result. auto CmdIter = Opt.Commands.begin() + CmdIndex; auto E = Opt.Commands.end(); - while (CmdIter != E && shouldSkip(**CmdIter)) { + while (CmdIter != E && shouldSkip(*CmdIter)) { ++CmdIter; ++CmdIndex; } - auto Pos = - std::find_if(CmdIter, E, [&](const std::unique_ptr<BaseCommand> &Base) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); - return Cmd && Cmd->Name == Name; - }); - if (Pos == E) { - Opt.Commands.insert(CmdIter, - llvm::make_unique<OutputSectionCommand>(Name)); + // If there is no command corresponding to this output section, + // create one and put a InputSectionDescription in it so that both + // representations agree on which input sections to use. + OutputSectionCommand *Cmd = getCmd(Sec); + if (!Cmd) { + Cmd = createOutputSectionCommand(Name, "<internal>"); + Opt.Commands.insert(CmdIter, Cmd); ++CmdIndex; + + Cmd->Sec = Sec; + SecToCommand[Sec] = Cmd; + auto *ISD = make<InputSectionDescription>(""); + for (InputSection *IS : Sec->Sections) + ISD->Sections.push_back(IS); + Cmd->Commands.push_back(ISD); + continue; } // Continue from where we found it. - CmdIndex = (Pos - Opt.Commands.begin()) + 1; + while (*CmdIter != Cmd) { + ++CmdIter; + ++CmdIndex; + } + ++CmdIndex; + } +} + +void LinkerScript::processNonSectionCommands() { + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) + assignSymbol(Cmd, false); + else if (auto *Cmd = dyn_cast<AssertCommand>(Base)) + Cmd->Expression(); + } +} + +// Do a last effort at synchronizing the linker script "AST" and the section +// list. This is needed to account for last minute changes, like adding a +// .ARM.exidx terminator and sorting SHF_LINK_ORDER sections. +// +// FIXME: We should instead create the "AST" earlier and the above changes would +// be done directly in the "AST". +// +// This can only handle new sections being added and sections being reordered. +void LinkerScript::synchronize() { + for (BaseCommand *Base : Opt.Commands) { + auto *Cmd = dyn_cast<OutputSectionCommand>(Base); + if (!Cmd) + continue; + ArrayRef<InputSection *> Sections = Cmd->Sec->Sections; + std::vector<InputSection **> ScriptSections; + DenseSet<InputSection *> ScriptSectionsSet; + for (BaseCommand *Base : Cmd->Commands) { + auto *ISD = dyn_cast<InputSectionDescription>(Base); + if (!ISD) + continue; + for (InputSection *&IS : ISD->Sections) { + if (IS->Live) { + ScriptSections.push_back(&IS); + ScriptSectionsSet.insert(IS); + } + } + } + std::vector<InputSection *> Missing; + for (InputSection *IS : Sections) + if (!ScriptSectionsSet.count(IS)) + Missing.push_back(IS); + if (!Missing.empty()) { + auto ISD = make<InputSectionDescription>(""); + ISD->Sections = Missing; + Cmd->Commands.push_back(ISD); + for (InputSection *&IS : ISD->Sections) + if (IS->Live) + ScriptSections.push_back(&IS); + } + assert(ScriptSections.size() == Sections.size()); + for (int I = 0, N = Sections.size(); I < N; ++I) + *ScriptSections[I] = Sections[I]; } } -template <class ELFT> -void LinkerScript<ELFT>::assignAddresses(std::vector<PhdrEntry> &Phdrs) { +static bool +allocateHeaders(std::vector<PhdrEntry> &Phdrs, + ArrayRef<OutputSectionCommand *> OutputSectionCommands, + uint64_t Min) { + auto FirstPTLoad = + std::find_if(Phdrs.begin(), Phdrs.end(), + [](const PhdrEntry &E) { return E.p_type == PT_LOAD; }); + if (FirstPTLoad == Phdrs.end()) + return false; + + uint64_t HeaderSize = getHeaderSize(); + if (HeaderSize <= Min || Script->hasPhdrsCommands()) { + Min = alignDown(Min - HeaderSize, Config->MaxPageSize); + Out::ElfHeader->Addr = Min; + Out::ProgramHeaders->Addr = Min + Out::ElfHeader->Size; + return true; + } + + assert(FirstPTLoad->First == Out::ElfHeader); + OutputSection *ActualFirst = nullptr; + for (OutputSectionCommand *Cmd : OutputSectionCommands) { + OutputSection *Sec = Cmd->Sec; + if (Sec->FirstInPtLoad == Out::ElfHeader) { + ActualFirst = Sec; + break; + } + } + if (ActualFirst) { + for (OutputSectionCommand *Cmd : OutputSectionCommands) { + OutputSection *Sec = Cmd->Sec; + if (Sec->FirstInPtLoad == Out::ElfHeader) + Sec->FirstInPtLoad = ActualFirst; + } + FirstPTLoad->First = ActualFirst; + } else { + Phdrs.erase(FirstPTLoad); + } + + auto PhdrI = std::find_if(Phdrs.begin(), Phdrs.end(), [](const PhdrEntry &E) { + return E.p_type == PT_PHDR; + }); + if (PhdrI != Phdrs.end()) + Phdrs.erase(PhdrI); + return false; +} + +void LinkerScript::assignAddresses( + std::vector<PhdrEntry> &Phdrs, + ArrayRef<OutputSectionCommand *> OutputSectionCommands) { // Assign addresses as instructed by linker script SECTIONS sub-commands. Dot = 0; + ErrorOnMissingSection = true; + switchTo(Aether); - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - if (auto *Cmd = dyn_cast<SymbolAssignment>(Base.get())) { - if (Cmd->Name == ".") { - Dot = Cmd->Expression(Dot); - } else if (Cmd->Sym) { - assignSectionSymbol<ELFT>(Cmd, Dot); - } + for (BaseCommand *Base : Opt.Commands) { + if (auto *Cmd = dyn_cast<SymbolAssignment>(Base)) { + assignSymbol(Cmd, false); continue; } - if (auto *Cmd = dyn_cast<AssertCommand>(Base.get())) { - Cmd->Expression(Dot); + if (auto *Cmd = dyn_cast<AssertCommand>(Base)) { + Cmd->Expression(); continue; } - auto *Cmd = cast<OutputSectionCommand>(Base.get()); - if (Cmd->AddrExpr) - Dot = Cmd->AddrExpr(Dot); + auto *Cmd = cast<OutputSectionCommand>(Base); assignOffsets(Cmd); } - uintX_t MinVA = std::numeric_limits<uintX_t>::max(); - for (OutputSectionBase *Sec : *OutputSections) { + uint64_t MinVA = std::numeric_limits<uint64_t>::max(); + for (OutputSectionCommand *Cmd : OutputSectionCommands) { + OutputSection *Sec = Cmd->Sec; if (Sec->Flags & SHF_ALLOC) MinVA = std::min<uint64_t>(MinVA, Sec->Addr); else Sec->Addr = 0; } - uintX_t HeaderSize = getHeaderSize(); - // If the linker script doesn't have PHDRS, add ElfHeader and ProgramHeaders - // now that we know we have space. - if (HeaderSize <= MinVA && !hasPhdrsCommands()) - allocateHeaders<ELFT>(Phdrs, *OutputSections); - - // ELF and Program headers need to be right before the first section in - // memory. Set their addresses accordingly. - MinVA = alignDown(MinVA - HeaderSize, Config->MaxPageSize); - Out<ELFT>::ElfHeader->Addr = MinVA; - Out<ELFT>::ProgramHeaders->Addr = Out<ELFT>::ElfHeader->Size + MinVA; + allocateHeaders(Phdrs, OutputSectionCommands, MinVA); } // Creates program headers as instructed by PHDRS linker script command. -template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { +std::vector<PhdrEntry> LinkerScript::createPhdrs() { std::vector<PhdrEntry> Ret; // Process PHDRS and FILEHDR keywords because they are not @@ -757,23 +1007,23 @@ template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { PhdrEntry &Phdr = Ret.back(); if (Cmd.HasFilehdr) - Phdr.add(Out<ELFT>::ElfHeader); + Phdr.add(Out::ElfHeader); if (Cmd.HasPhdrs) - Phdr.add(Out<ELFT>::ProgramHeaders); + Phdr.add(Out::ProgramHeaders); if (Cmd.LMAExpr) { - Phdr.p_paddr = Cmd.LMAExpr(0); + Phdr.p_paddr = Cmd.LMAExpr().getValue(); Phdr.HasLMA = true; } } // Add output sections to program headers. - for (OutputSectionBase *Sec : *OutputSections) { + for (OutputSection *Sec : *OutputSections) { if (!(Sec->Flags & SHF_ALLOC)) break; // Assign headers specified by linker script - for (size_t Id : getPhdrIndices(Sec->getName())) { + for (size_t Id : getPhdrIndices(Sec)) { Ret[Id].add(Sec); if (Opt.PhdrsCommands[Id].Flags == UINT_MAX) Ret[Id].p_flags |= Sec->getPhdrFlags(); @@ -782,155 +1032,147 @@ template <class ELFT> std::vector<PhdrEntry> LinkerScript<ELFT>::createPhdrs() { return Ret; } -template <class ELFT> bool LinkerScript<ELFT>::ignoreInterpSection() { +bool LinkerScript::ignoreInterpSection() { // Ignore .interp section in case we have PHDRS specification // and PT_INTERP isn't listed. - return !Opt.PhdrsCommands.empty() && - llvm::find_if(Opt.PhdrsCommands, [](const PhdrsCommand &Cmd) { - return Cmd.Type == PT_INTERP; - }) == Opt.PhdrsCommands.end(); + if (Opt.PhdrsCommands.empty()) + return false; + for (PhdrsCommand &Cmd : Opt.PhdrsCommands) + if (Cmd.Type == PT_INTERP) + return false; + return true; +} + +OutputSectionCommand *LinkerScript::getCmd(OutputSection *Sec) const { + auto I = SecToCommand.find(Sec); + if (I == SecToCommand.end()) + return nullptr; + return I->second; } -template <class ELFT> uint32_t LinkerScript<ELFT>::getFiller(StringRef Name) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) - if (Cmd->Name == Name) - return Cmd->Filler; +uint32_t OutputSectionCommand::getFiller() { + if (Filler) + return *Filler; + if (Sec->Flags & SHF_EXECINSTR) + return Target->TrapInstr; return 0; } -template <class ELFT> static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { - const endianness E = ELFT::TargetEndianness; - - switch (Size) { - case 1: - *Buf = (uint8_t)Data; - break; - case 2: - write16<E>(Buf, Data); - break; - case 4: - write32<E>(Buf, Data); - break; - case 8: - write64<E>(Buf, Data); - break; - default: + if (Size == 1) + *Buf = Data; + else if (Size == 2) + write16(Buf, Data, Config->Endianness); + else if (Size == 4) + write32(Buf, Data, Config->Endianness); + else if (Size == 8) + write64(Buf, Data, Config->Endianness); + else llvm_unreachable("unsupported Size argument"); - } } -template <class ELFT> -void LinkerScript<ELFT>::writeDataBytes(StringRef Name, uint8_t *Buf) { - int I = getSectionIndex(Name); - if (I == INT_MAX) - return; - - auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get()); - for (const std::unique_ptr<BaseCommand> &Base : Cmd->Commands) - if (auto *Data = dyn_cast<BytesDataCommand>(Base.get())) - writeInt<ELFT>(Buf + Data->Offset, Data->Expression(0), Data->Size); -} +// Compress section contents if this section contains debug info. +template <class ELFT> void OutputSectionCommand::maybeCompress() { + typedef typename ELFT::Chdr Elf_Chdr; -template <class ELFT> bool LinkerScript<ELFT>::hasLMA(StringRef Name) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get())) - if (Cmd->LMAExpr && Cmd->Name == Name) - return true; - return false; -} - -// Returns the index of the given section name in linker script -// SECTIONS commands. Sections are laid out as the same order as they -// were in the script. If a given name did not appear in the script, -// it returns INT_MAX, so that it will be laid out at end of file. -template <class ELFT> int LinkerScript<ELFT>::getSectionIndex(StringRef Name) { - for (int I = 0, E = Opt.Commands.size(); I != E; ++I) - if (auto *Cmd = dyn_cast<OutputSectionCommand>(Opt.Commands[I].get())) - if (Cmd->Name == Name) - return I; - return INT_MAX; -} - -template <class ELFT> bool LinkerScript<ELFT>::hasPhdrsCommands() { - return !Opt.PhdrsCommands.empty(); -} + // Compress only DWARF debug sections. + if (!Config->CompressDebugSections || (Sec->Flags & SHF_ALLOC) || + !Name.startswith(".debug_")) + return; -template <class ELFT> -const OutputSectionBase *LinkerScript<ELFT>::getOutputSection(const Twine &Loc, - StringRef Name) { - static OutputSectionBase FakeSec("", 0, 0); + // Create a section header. + Sec->ZDebugHeader.resize(sizeof(Elf_Chdr)); + auto *Hdr = reinterpret_cast<Elf_Chdr *>(Sec->ZDebugHeader.data()); + Hdr->ch_type = ELFCOMPRESS_ZLIB; + Hdr->ch_size = Sec->Size; + Hdr->ch_addralign = Sec->Alignment; + + // Write section contents to a temporary buffer and compress it. + std::vector<uint8_t> Buf(Sec->Size); + writeTo<ELFT>(Buf.data()); + if (Error E = zlib::compress(toStringRef(Buf), Sec->CompressedData)) + fatal("compress failed: " + llvm::toString(std::move(E))); + + // Update section headers. + Sec->Size = sizeof(Elf_Chdr) + Sec->CompressedData.size(); + Sec->Flags |= SHF_COMPRESSED; +} + +template <class ELFT> void OutputSectionCommand::writeTo(uint8_t *Buf) { + Sec->Loc = Buf; + + // We may have already rendered compressed content when using + // -compress-debug-sections option. Write it together with header. + if (!Sec->CompressedData.empty()) { + memcpy(Buf, Sec->ZDebugHeader.data(), Sec->ZDebugHeader.size()); + memcpy(Buf + Sec->ZDebugHeader.size(), Sec->CompressedData.data(), + Sec->CompressedData.size()); + return; + } - for (OutputSectionBase *Sec : *OutputSections) - if (Sec->getName() == Name) - return Sec; + if (Sec->Type == SHT_NOBITS) + return; - error(Loc + ": undefined section " + Name); - return &FakeSec; -} + // Write leading padding. + std::vector<InputSection *> Sections; + for (BaseCommand *Cmd : Commands) + if (auto *ISD = dyn_cast<InputSectionDescription>(Cmd)) + for (InputSection *IS : ISD->Sections) + if (IS->Live) + Sections.push_back(IS); + uint32_t Filler = getFiller(); + if (Filler) + fill(Buf, Sections.empty() ? Sec->Size : Sections[0]->OutSecOff, Filler); + + parallelForEachN(0, Sections.size(), [=](size_t I) { + InputSection *IS = Sections[I]; + IS->writeTo<ELFT>(Buf); + + // Fill gaps between sections. + if (Filler) { + uint8_t *Start = Buf + IS->OutSecOff + IS->getSize(); + uint8_t *End; + if (I + 1 == Sections.size()) + End = Buf + Sec->Size; + else + End = Buf + Sections[I + 1]->OutSecOff; + fill(Start, End - Start, Filler); + } + }); -// This function is essentially the same as getOutputSection(Name)->Size, -// but it won't print out an error message if a given section is not found. -// -// Linker script does not create an output section if its content is empty. -// We want to allow SIZEOF(.foo) where .foo is a section which happened to -// be empty. That is why this function is different from getOutputSection(). -template <class ELFT> -uint64_t LinkerScript<ELFT>::getOutputSectionSize(StringRef Name) { - for (OutputSectionBase *Sec : *OutputSections) - if (Sec->getName() == Name) - return Sec->Size; - return 0; + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + for (BaseCommand *Base : Commands) + if (auto *Data = dyn_cast<BytesDataCommand>(Base)) + writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); } -template <class ELFT> uint64_t LinkerScript<ELFT>::getHeaderSize() { - return elf::getHeaderSize<ELFT>(); +bool LinkerScript::hasLMA(OutputSection *Sec) { + if (OutputSectionCommand *Cmd = getCmd(Sec)) + if (Cmd->LMAExpr) + return true; + return false; } -template <class ELFT> -uint64_t LinkerScript<ELFT>::getSymbolValue(const Twine &Loc, StringRef S) { - if (SymbolBody *B = Symtab<ELFT>::X->find(S)) - return B->getVA<ELFT>(); +ExprValue LinkerScript::getSymbolValue(const Twine &Loc, StringRef S) { + if (S == ".") + return {CurOutSec, Dot - CurOutSec->Addr}; + if (SymbolBody *B = findSymbol(S)) { + if (auto *D = dyn_cast<DefinedRegular>(B)) + return {D->Section, D->Value}; + if (auto *C = dyn_cast<DefinedCommon>(B)) + return {InX::Common, C->Offset}; + } error(Loc + ": symbol not found: " + S); return 0; } -template <class ELFT> bool LinkerScript<ELFT>::isDefined(StringRef S) { - return Symtab<ELFT>::X->find(S) != nullptr; -} - -template <class ELFT> bool LinkerScript<ELFT>::isAbsolute(StringRef S) { - SymbolBody *Sym = Symtab<ELFT>::X->find(S); - auto *DR = dyn_cast_or_null<DefinedRegular<ELFT>>(Sym); - return DR && !DR->Section; -} - -// Gets section symbol belongs to. Symbol "." doesn't belong to any -// specific section but isn't absolute at the same time, so we try -// to find suitable section for it as well. -template <class ELFT> -const OutputSectionBase *LinkerScript<ELFT>::getSymbolSection(StringRef S) { - SymbolBody *Sym = Symtab<ELFT>::X->find(S); - if (!Sym) { - if (OutputSections->empty()) - return nullptr; - return CurOutSec ? CurOutSec : (*OutputSections)[0]; - } - - return SymbolTableSection<ELFT>::getOutputSection(Sym); -} - -// Returns indices of ELF headers containing specific section, identified -// by Name. Each index is a zero based number of ELF header listed within -// PHDRS {} script block. -template <class ELFT> -std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { - for (const std::unique_ptr<BaseCommand> &Base : Opt.Commands) { - auto *Cmd = dyn_cast<OutputSectionCommand>(Base.get()); - if (!Cmd || Cmd->Name != SectionName) - continue; +bool LinkerScript::isDefined(StringRef S) { return findSymbol(S) != nullptr; } +// Returns indices of ELF headers containing specific section. Each index is a +// zero based number of ELF header listed within PHDRS {} script block. +std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *Sec) { + if (OutputSectionCommand *Cmd = getCmd(Sec)) { std::vector<size_t> Ret; for (StringRef PhdrName : Cmd->Phdrs) Ret.push_back(getPhdrIndex(Cmd->Location, PhdrName)); @@ -939,8 +1181,7 @@ std::vector<size_t> LinkerScript<ELFT>::getPhdrIndices(StringRef SectionName) { return {}; } -template <class ELFT> -size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { +size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { size_t I = 0; for (PhdrsCommand &Cmd : Opt.PhdrsCommands) { if (Cmd.Name == PhdrName) @@ -951,1008 +1192,12 @@ size_t LinkerScript<ELFT>::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { return 0; } -class elf::ScriptParser final : public ScriptParserBase { - typedef void (ScriptParser::*Handler)(); - -public: - ScriptParser(MemoryBufferRef MB) - : ScriptParserBase(MB), - IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} - - void readLinkerScript(); - void readVersionScript(); - void readDynamicList(); - -private: - void addFile(StringRef Path); - - void readAsNeeded(); - void readEntry(); - void readExtern(); - void readGroup(); - void readInclude(); - void readOutput(); - void readOutputArch(); - void readOutputFormat(); - void readPhdrs(); - void readSearchDir(); - void readSections(); - void readVersion(); - void readVersionScriptCommand(); - - SymbolAssignment *readAssignment(StringRef Name); - BytesDataCommand *readBytesDataCommand(StringRef Tok); - uint32_t readFill(); - OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); - uint32_t readOutputSectionFiller(StringRef Tok); - std::vector<StringRef> readOutputSectionPhdrs(); - InputSectionDescription *readInputSectionDescription(StringRef Tok); - StringMatcher readFilePatterns(); - std::vector<SectionPattern> readInputSectionsList(); - InputSectionDescription *readInputSectionRules(StringRef FilePattern); - unsigned readPhdrType(); - SortSectionPolicy readSortKind(); - SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); - SymbolAssignment *readProvideOrAssignment(StringRef Tok); - void readSort(); - Expr readAssert(); - - Expr readExpr(); - Expr readExpr1(Expr Lhs, int MinPrec); - StringRef readParenLiteral(); - Expr readPrimary(); - Expr readTernary(Expr Cond); - Expr readParenExpr(); - - // For parsing version script. - std::vector<SymbolVersion> readVersionExtern(); - void readAnonymousDeclaration(); - void readVersionDeclaration(StringRef VerStr); - std::vector<SymbolVersion> readSymbols(); - void readLocals(); - - ScriptConfiguration &Opt = *ScriptConfig; - bool IsUnderSysroot; -}; - -void ScriptParser::readDynamicList() { - expect("{"); - readAnonymousDeclaration(); - if (!atEOF()) - setError("EOF expected, but got " + next()); -} - -void ScriptParser::readVersionScript() { - readVersionScriptCommand(); - if (!atEOF()) - setError("EOF expected, but got " + next()); -} - -void ScriptParser::readVersionScriptCommand() { - if (consume("{")) { - readAnonymousDeclaration(); - return; - } - - while (!atEOF() && !Error && peek() != "}") { - StringRef VerStr = next(); - if (VerStr == "{") { - setError("anonymous version definition is used in " - "combination with other version definitions"); - return; - } - expect("{"); - readVersionDeclaration(VerStr); - } -} - -void ScriptParser::readVersion() { - expect("{"); - readVersionScriptCommand(); - expect("}"); -} - -void ScriptParser::readLinkerScript() { - while (!atEOF()) { - StringRef Tok = next(); - if (Tok == ";") - continue; - - if (Tok == "ASSERT") { - Opt.Commands.emplace_back(new AssertCommand(readAssert())); - } else if (Tok == "ENTRY") { - readEntry(); - } else if (Tok == "EXTERN") { - readExtern(); - } else if (Tok == "GROUP" || Tok == "INPUT") { - readGroup(); - } else if (Tok == "INCLUDE") { - readInclude(); - } else if (Tok == "OUTPUT") { - readOutput(); - } else if (Tok == "OUTPUT_ARCH") { - readOutputArch(); - } else if (Tok == "OUTPUT_FORMAT") { - readOutputFormat(); - } else if (Tok == "PHDRS") { - readPhdrs(); - } else if (Tok == "SEARCH_DIR") { - readSearchDir(); - } else if (Tok == "SECTIONS") { - readSections(); - } else if (Tok == "VERSION") { - readVersion(); - } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { - Opt.Commands.emplace_back(Cmd); - } else { - setError("unknown directive: " + Tok); - } - } -} - -void ScriptParser::addFile(StringRef S) { - if (IsUnderSysroot && S.startswith("/")) { - SmallString<128> PathData; - StringRef Path = (Config->Sysroot + S).toStringRef(PathData); - if (sys::fs::exists(Path)) { - Driver->addFile(Saver.save(Path)); - return; - } - } - - if (sys::path::is_absolute(S)) { - Driver->addFile(S); - } else if (S.startswith("=")) { - if (Config->Sysroot.empty()) - Driver->addFile(S.substr(1)); - else - Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1))); - } else if (S.startswith("-l")) { - Driver->addLibrary(S.substr(2)); - } else if (sys::fs::exists(S)) { - Driver->addFile(S); - } else { - if (Optional<std::string> Path = findFromSearchPaths(S)) - Driver->addFile(Saver.save(*Path)); - else - setError("unable to find " + S); - } -} - -void ScriptParser::readAsNeeded() { - expect("("); - bool Orig = Config->AsNeeded; - Config->AsNeeded = true; - while (!Error && !consume(")")) - addFile(unquote(next())); - Config->AsNeeded = Orig; -} - -void ScriptParser::readEntry() { - // -e <symbol> takes predecence over ENTRY(<symbol>). - expect("("); - StringRef Tok = next(); - if (Config->Entry.empty()) - Config->Entry = Tok; - expect(")"); -} - -void ScriptParser::readExtern() { - expect("("); - while (!Error && !consume(")")) - Config->Undefined.push_back(next()); -} - -void ScriptParser::readGroup() { - expect("("); - while (!Error && !consume(")")) { - StringRef Tok = next(); - if (Tok == "AS_NEEDED") - readAsNeeded(); - else - addFile(unquote(Tok)); - } -} - -void ScriptParser::readInclude() { - StringRef Tok = unquote(next()); - - // https://sourceware.org/binutils/docs/ld/File-Commands.html: - // The file will be searched for in the current directory, and in any - // directory specified with the -L option. - if (sys::fs::exists(Tok)) { - if (Optional<MemoryBufferRef> MB = readFile(Tok)) - tokenize(*MB); - return; - } - if (Optional<std::string> Path = findFromSearchPaths(Tok)) { - if (Optional<MemoryBufferRef> MB = readFile(*Path)) - tokenize(*MB); - return; - } - setError("cannot open " + Tok); -} - -void ScriptParser::readOutput() { - // -o <file> takes predecence over OUTPUT(<file>). - expect("("); - StringRef Tok = next(); - if (Config->OutputFile.empty()) - Config->OutputFile = unquote(Tok); - expect(")"); -} - -void ScriptParser::readOutputArch() { - // Error checking only for now. - expect("("); - skip(); - expect(")"); -} - -void ScriptParser::readOutputFormat() { - // Error checking only for now. - expect("("); - skip(); - StringRef Tok = next(); - if (Tok == ")") - return; - if (Tok != ",") { - setError("unexpected token: " + Tok); - return; - } - skip(); - expect(","); - skip(); - expect(")"); -} - -void ScriptParser::readPhdrs() { - expect("{"); - while (!Error && !consume("}")) { - StringRef Tok = next(); - Opt.PhdrsCommands.push_back( - {Tok, PT_NULL, false, false, UINT_MAX, nullptr}); - PhdrsCommand &PhdrCmd = Opt.PhdrsCommands.back(); - - PhdrCmd.Type = readPhdrType(); - do { - Tok = next(); - if (Tok == ";") - break; - if (Tok == "FILEHDR") - PhdrCmd.HasFilehdr = true; - else if (Tok == "PHDRS") - PhdrCmd.HasPhdrs = true; - else if (Tok == "AT") - PhdrCmd.LMAExpr = readParenExpr(); - else if (Tok == "FLAGS") { - expect("("); - // Passing 0 for the value of dot is a bit of a hack. It means that - // we accept expressions like ".|1". - PhdrCmd.Flags = readExpr()(0); - expect(")"); - } else - setError("unexpected header attribute: " + Tok); - } while (!Error); - } -} - -void ScriptParser::readSearchDir() { - expect("("); - StringRef Tok = next(); - if (!Config->Nostdlib) - Config->SearchPaths.push_back(unquote(Tok)); - expect(")"); -} - -void ScriptParser::readSections() { - Opt.HasSections = true; - // -no-rosegment is used to avoid placing read only non-executable sections in - // their own segment. We do the same if SECTIONS command is present in linker - // script. See comment for computeFlags(). - Config->SingleRoRx = true; - - expect("{"); - while (!Error && !consume("}")) { - StringRef Tok = next(); - BaseCommand *Cmd = readProvideOrAssignment(Tok); - if (!Cmd) { - if (Tok == "ASSERT") - Cmd = new AssertCommand(readAssert()); - else - Cmd = readOutputSectionDescription(Tok); - } - Opt.Commands.emplace_back(Cmd); - } -} - -static int precedence(StringRef Op) { - return StringSwitch<int>(Op) - .Cases("*", "/", 5) - .Cases("+", "-", 4) - .Cases("<<", ">>", 3) - .Cases("<", "<=", ">", ">=", "==", "!=", 2) - .Cases("&", "|", 1) - .Default(-1); -} - -StringMatcher ScriptParser::readFilePatterns() { - std::vector<StringRef> V; - while (!Error && !consume(")")) - V.push_back(next()); - return StringMatcher(V); -} - -SortSectionPolicy ScriptParser::readSortKind() { - if (consume("SORT") || consume("SORT_BY_NAME")) - return SortSectionPolicy::Name; - if (consume("SORT_BY_ALIGNMENT")) - return SortSectionPolicy::Alignment; - if (consume("SORT_BY_INIT_PRIORITY")) - return SortSectionPolicy::Priority; - if (consume("SORT_NONE")) - return SortSectionPolicy::None; - return SortSectionPolicy::Default; -} - -// Method reads a list of sequence of excluded files and section globs given in -// a following form: ((EXCLUDE_FILE(file_pattern+))? section_pattern+)+ -// Example: *(.foo.1 EXCLUDE_FILE (*a.o) .foo.2 EXCLUDE_FILE (*b.o) .foo.3) -// The semantics of that is next: -// * Include .foo.1 from every file. -// * Include .foo.2 from every file but a.o -// * Include .foo.3 from every file but b.o -std::vector<SectionPattern> ScriptParser::readInputSectionsList() { - std::vector<SectionPattern> Ret; - while (!Error && peek() != ")") { - StringMatcher ExcludeFilePat; - if (consume("EXCLUDE_FILE")) { - expect("("); - ExcludeFilePat = readFilePatterns(); - } - - std::vector<StringRef> V; - while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") - V.push_back(next()); - - if (!V.empty()) - Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); - else - setError("section pattern is expected"); - } - return Ret; -} - -// Reads contents of "SECTIONS" directive. That directive contains a -// list of glob patterns for input sections. The grammar is as follows. -// -// <patterns> ::= <section-list> -// | <sort> "(" <section-list> ")" -// | <sort> "(" <sort> "(" <section-list> ")" ")" -// -// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" -// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" -// -// <section-list> is parsed by readInputSectionsList(). -InputSectionDescription * -ScriptParser::readInputSectionRules(StringRef FilePattern) { - auto *Cmd = new InputSectionDescription(FilePattern); - expect("("); - while (!Error && !consume(")")) { - SortSectionPolicy Outer = readSortKind(); - SortSectionPolicy Inner = SortSectionPolicy::Default; - std::vector<SectionPattern> V; - if (Outer != SortSectionPolicy::Default) { - expect("("); - Inner = readSortKind(); - if (Inner != SortSectionPolicy::Default) { - expect("("); - V = readInputSectionsList(); - expect(")"); - } else { - V = readInputSectionsList(); - } - expect(")"); - } else { - V = readInputSectionsList(); - } - - for (SectionPattern &Pat : V) { - Pat.SortInner = Inner; - Pat.SortOuter = Outer; - } - - std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); - } - return Cmd; -} - -InputSectionDescription * -ScriptParser::readInputSectionDescription(StringRef Tok) { - // Input section wildcard can be surrounded by KEEP. - // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep - if (Tok == "KEEP") { - expect("("); - StringRef FilePattern = next(); - InputSectionDescription *Cmd = readInputSectionRules(FilePattern); - expect(")"); - Opt.KeptSections.push_back(Cmd); - return Cmd; - } - return readInputSectionRules(Tok); -} - -void ScriptParser::readSort() { - expect("("); - expect("CONSTRUCTORS"); - expect(")"); -} - -Expr ScriptParser::readAssert() { - expect("("); - Expr E = readExpr(); - expect(","); - StringRef Msg = unquote(next()); - expect(")"); - return [=](uint64_t Dot) { - uint64_t V = E(Dot); - if (!V) - error(Msg); - return V; - }; -} - -// Reads a FILL(expr) command. We handle the FILL command as an -// alias for =fillexp section attribute, which is different from -// what GNU linkers do. -// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html -uint32_t ScriptParser::readFill() { - expect("("); - uint32_t V = readOutputSectionFiller(next()); - expect(")"); - expect(";"); - return V; -} - -OutputSectionCommand * -ScriptParser::readOutputSectionDescription(StringRef OutSec) { - OutputSectionCommand *Cmd = new OutputSectionCommand(OutSec); - Cmd->Location = getCurrentLocation(); - - // Read an address expression. - // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html#Output-Section-Address - if (peek() != ":") - Cmd->AddrExpr = readExpr(); - - expect(":"); - - if (consume("AT")) - Cmd->LMAExpr = readParenExpr(); - if (consume("ALIGN")) - Cmd->AlignExpr = readParenExpr(); - if (consume("SUBALIGN")) - Cmd->SubalignExpr = readParenExpr(); - - // Parse constraints. - if (consume("ONLY_IF_RO")) - Cmd->Constraint = ConstraintKind::ReadOnly; - if (consume("ONLY_IF_RW")) - Cmd->Constraint = ConstraintKind::ReadWrite; - expect("{"); - - while (!Error && !consume("}")) { - StringRef Tok = next(); - if (SymbolAssignment *Assignment = readProvideOrAssignment(Tok)) { - Cmd->Commands.emplace_back(Assignment); - } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { - Cmd->Commands.emplace_back(Data); - } else if (Tok == "ASSERT") { - Cmd->Commands.emplace_back(new AssertCommand(readAssert())); - expect(";"); - } else if (Tok == "FILL") { - Cmd->Filler = readFill(); - } else if (Tok == "SORT") { - readSort(); - } else if (peek() == "(") { - Cmd->Commands.emplace_back(readInputSectionDescription(Tok)); - } else { - setError("unknown command " + Tok); - } - } - Cmd->Phdrs = readOutputSectionPhdrs(); - - if (consume("=")) - Cmd->Filler = readOutputSectionFiller(next()); - else if (peek().startswith("=")) - Cmd->Filler = readOutputSectionFiller(next().drop_front()); - - return Cmd; -} - -// Read "=<number>" where <number> is an octal/decimal/hexadecimal number. -// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html -// -// ld.gold is not fully compatible with ld.bfd. ld.bfd handles -// hexstrings as blobs of arbitrary sizes, while ld.gold handles them -// as 32-bit big-endian values. We will do the same as ld.gold does -// because it's simpler than what ld.bfd does. -uint32_t ScriptParser::readOutputSectionFiller(StringRef Tok) { - uint32_t V; - if (!Tok.getAsInteger(0, V)) - return V; - setError("invalid filler expression: " + Tok); - return 0; -} - -SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { - expect("("); - SymbolAssignment *Cmd = readAssignment(next()); - Cmd->Provide = Provide; - Cmd->Hidden = Hidden; - expect(")"); - expect(";"); - return Cmd; -} - -SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { - SymbolAssignment *Cmd = nullptr; - if (peek() == "=" || peek() == "+=") { - Cmd = readAssignment(Tok); - expect(";"); - } else if (Tok == "PROVIDE") { - Cmd = readProvideHidden(true, false); - } else if (Tok == "HIDDEN") { - Cmd = readProvideHidden(false, true); - } else if (Tok == "PROVIDE_HIDDEN") { - Cmd = readProvideHidden(true, true); - } - return Cmd; -} - -static uint64_t getSymbolValue(const Twine &Loc, StringRef S, uint64_t Dot) { - if (S == ".") - return Dot; - return ScriptBase->getSymbolValue(Loc, S); -} - -static bool isAbsolute(StringRef S) { - if (S == ".") - return false; - return ScriptBase->isAbsolute(S); -} - -SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { - StringRef Op = next(); - Expr E; - assert(Op == "=" || Op == "+="); - if (consume("ABSOLUTE")) { - // The RHS may be something like "ABSOLUTE(.) & 0xff". - // Call readExpr1 to read the whole expression. - E = readExpr1(readParenExpr(), 0); - E.IsAbsolute = [] { return true; }; - } else { - E = readExpr(); - } - if (Op == "+=") { - std::string Loc = getCurrentLocation(); - E = [=](uint64_t Dot) { - return getSymbolValue(Loc, Name, Dot) + E(Dot); - }; - } - return new SymbolAssignment(Name, E); -} - -// This is an operator-precedence parser to parse a linker -// script expression. -Expr ScriptParser::readExpr() { return readExpr1(readPrimary(), 0); } - -static Expr combine(StringRef Op, Expr L, Expr R) { - if (Op == "*") - return [=](uint64_t Dot) { return L(Dot) * R(Dot); }; - if (Op == "/") { - return [=](uint64_t Dot) -> uint64_t { - uint64_t RHS = R(Dot); - if (RHS == 0) { - error("division by zero"); - return 0; - } - return L(Dot) / RHS; - }; - } - if (Op == "+") - return {[=](uint64_t Dot) { return L(Dot) + R(Dot); }, - [=] { return L.IsAbsolute() && R.IsAbsolute(); }, - [=] { - const OutputSectionBase *S = L.Section(); - return S ? S : R.Section(); - }}; - if (Op == "-") - return [=](uint64_t Dot) { return L(Dot) - R(Dot); }; - if (Op == "<<") - return [=](uint64_t Dot) { return L(Dot) << R(Dot); }; - if (Op == ">>") - return [=](uint64_t Dot) { return L(Dot) >> R(Dot); }; - if (Op == "<") - return [=](uint64_t Dot) { return L(Dot) < R(Dot); }; - if (Op == ">") - return [=](uint64_t Dot) { return L(Dot) > R(Dot); }; - if (Op == ">=") - return [=](uint64_t Dot) { return L(Dot) >= R(Dot); }; - if (Op == "<=") - return [=](uint64_t Dot) { return L(Dot) <= R(Dot); }; - if (Op == "==") - return [=](uint64_t Dot) { return L(Dot) == R(Dot); }; - if (Op == "!=") - return [=](uint64_t Dot) { return L(Dot) != R(Dot); }; - if (Op == "&") - return [=](uint64_t Dot) { return L(Dot) & R(Dot); }; - if (Op == "|") - return [=](uint64_t Dot) { return L(Dot) | R(Dot); }; - llvm_unreachable("invalid operator"); -} - -// This is a part of the operator-precedence parser. This function -// assumes that the remaining token stream starts with an operator. -Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { - while (!atEOF() && !Error) { - // Read an operator and an expression. - if (consume("?")) - return readTernary(Lhs); - StringRef Op1 = peek(); - if (precedence(Op1) < MinPrec) - break; - skip(); - Expr Rhs = readPrimary(); - - // Evaluate the remaining part of the expression first if the - // next operator has greater precedence than the previous one. - // For example, if we have read "+" and "3", and if the next - // operator is "*", then we'll evaluate 3 * ... part first. - while (!atEOF()) { - StringRef Op2 = peek(); - if (precedence(Op2) <= precedence(Op1)) - break; - Rhs = readExpr1(Rhs, precedence(Op2)); - } - - Lhs = combine(Op1, Lhs, Rhs); - } - return Lhs; -} - -uint64_t static getConstant(StringRef S) { - if (S == "COMMONPAGESIZE") - return Target->PageSize; - if (S == "MAXPAGESIZE") - return Config->MaxPageSize; - error("unknown constant: " + S); - return 0; -} - -// Parses Tok as an integer. Returns true if successful. -// It recognizes hexadecimal (prefixed with "0x" or suffixed with "H") -// and decimal numbers. Decimal numbers may have "K" (kilo) or -// "M" (mega) prefixes. -static bool readInteger(StringRef Tok, uint64_t &Result) { - // Negative number - if (Tok.startswith("-")) { - if (!readInteger(Tok.substr(1), Result)) - return false; - Result = -Result; - return true; - } - - // Hexadecimal - if (Tok.startswith_lower("0x")) - return !Tok.substr(2).getAsInteger(16, Result); - if (Tok.endswith_lower("H")) - return !Tok.drop_back().getAsInteger(16, Result); - - // Decimal - int Suffix = 1; - if (Tok.endswith_lower("K")) { - Suffix = 1024; - Tok = Tok.drop_back(); - } else if (Tok.endswith_lower("M")) { - Suffix = 1024 * 1024; - Tok = Tok.drop_back(); - } - if (Tok.getAsInteger(10, Result)) - return false; - Result *= Suffix; - return true; -} - -BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { - int Size = StringSwitch<unsigned>(Tok) - .Case("BYTE", 1) - .Case("SHORT", 2) - .Case("LONG", 4) - .Case("QUAD", 8) - .Default(-1); - if (Size == -1) - return nullptr; - - return new BytesDataCommand(readParenExpr(), Size); -} - -StringRef ScriptParser::readParenLiteral() { - expect("("); - StringRef Tok = next(); - expect(")"); - return Tok; -} - -Expr ScriptParser::readPrimary() { - if (peek() == "(") - return readParenExpr(); - - StringRef Tok = next(); - std::string Location = getCurrentLocation(); - - if (Tok == "~") { - Expr E = readPrimary(); - return [=](uint64_t Dot) { return ~E(Dot); }; - } - if (Tok == "-") { - Expr E = readPrimary(); - return [=](uint64_t Dot) { return -E(Dot); }; - } - - // Built-in functions are parsed here. - // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. - if (Tok == "ADDR") { - StringRef Name = readParenLiteral(); - return {[=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->Addr; - }, - [=] { return false; }, - [=] { return ScriptBase->getOutputSection(Location, Name); }}; - } - if (Tok == "LOADADDR") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->getLMA(); - }; - } - if (Tok == "ASSERT") - return readAssert(); - if (Tok == "ALIGN") { - expect("("); - Expr E = readExpr(); - if (consume(",")) { - Expr E2 = readExpr(); - expect(")"); - return [=](uint64_t Dot) { return alignTo(E(Dot), E2(Dot)); }; - } - expect(")"); - return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; - } - if (Tok == "CONSTANT") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return getConstant(Name); }; - } - if (Tok == "DEFINED") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return ScriptBase->isDefined(Name) ? 1 : 0; }; - } - if (Tok == "SEGMENT_START") { - expect("("); - skip(); - expect(","); - Expr E = readExpr(); - expect(")"); - return [=](uint64_t Dot) { return E(Dot); }; - } - if (Tok == "DATA_SEGMENT_ALIGN") { - expect("("); - Expr E = readExpr(); - expect(","); - readExpr(); - expect(")"); - return [=](uint64_t Dot) { return alignTo(Dot, E(Dot)); }; - } - if (Tok == "DATA_SEGMENT_END") { - expect("("); - expect("."); - expect(")"); - return [](uint64_t Dot) { return Dot; }; - } - // GNU linkers implements more complicated logic to handle - // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and just align to - // the next page boundary for simplicity. - if (Tok == "DATA_SEGMENT_RELRO_END") { - expect("("); - readExpr(); - expect(","); - readExpr(); - expect(")"); - return [](uint64_t Dot) { return alignTo(Dot, Target->PageSize); }; - } - if (Tok == "SIZEOF") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { return ScriptBase->getOutputSectionSize(Name); }; - } - if (Tok == "ALIGNOF") { - StringRef Name = readParenLiteral(); - return [=](uint64_t Dot) { - return ScriptBase->getOutputSection(Location, Name)->Addralign; - }; - } - if (Tok == "SIZEOF_HEADERS") - return [=](uint64_t Dot) { return ScriptBase->getHeaderSize(); }; - - // Tok is a literal number. - uint64_t V; - if (readInteger(Tok, V)) - return [=](uint64_t Dot) { return V; }; - - // Tok is a symbol name. - if (Tok != "." && !isValidCIdentifier(Tok)) - setError("malformed number: " + Tok); - return {[=](uint64_t Dot) { return getSymbolValue(Location, Tok, Dot); }, - [=] { return isAbsolute(Tok); }, - [=] { return ScriptBase->getSymbolSection(Tok); }}; -} - -Expr ScriptParser::readTernary(Expr Cond) { - Expr L = readExpr(); - expect(":"); - Expr R = readExpr(); - return [=](uint64_t Dot) { return Cond(Dot) ? L(Dot) : R(Dot); }; -} - -Expr ScriptParser::readParenExpr() { - expect("("); - Expr E = readExpr(); - expect(")"); - return E; -} - -std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { - std::vector<StringRef> Phdrs; - while (!Error && peek().startswith(":")) { - StringRef Tok = next(); - Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); - } - return Phdrs; -} - -// Read a program header type name. The next token must be a -// name of a program header type or a constant (e.g. "0x3"). -unsigned ScriptParser::readPhdrType() { - StringRef Tok = next(); - uint64_t Val; - if (readInteger(Tok, Val)) - return Val; - - unsigned Ret = StringSwitch<unsigned>(Tok) - .Case("PT_NULL", PT_NULL) - .Case("PT_LOAD", PT_LOAD) - .Case("PT_DYNAMIC", PT_DYNAMIC) - .Case("PT_INTERP", PT_INTERP) - .Case("PT_NOTE", PT_NOTE) - .Case("PT_SHLIB", PT_SHLIB) - .Case("PT_PHDR", PT_PHDR) - .Case("PT_TLS", PT_TLS) - .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) - .Case("PT_GNU_STACK", PT_GNU_STACK) - .Case("PT_GNU_RELRO", PT_GNU_RELRO) - .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) - .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) - .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) - .Default(-1); - - if (Ret == (unsigned)-1) { - setError("invalid program header type: " + Tok); - return PT_NULL; - } - return Ret; -} - -// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". -void ScriptParser::readAnonymousDeclaration() { - // Read global symbols first. "global:" is default, so if there's - // no label, we assume global symbols. - if (consume("global:") || peek() != "local:") - Config->VersionScriptGlobals = readSymbols(); - - readLocals(); - expect("}"); - expect(";"); -} - -void ScriptParser::readLocals() { - if (!consume("local:")) - return; - std::vector<SymbolVersion> Locals = readSymbols(); - for (SymbolVersion V : Locals) { - if (V.Name == "*") { - Config->DefaultSymbolVersion = VER_NDX_LOCAL; - continue; - } - Config->VersionScriptLocals.push_back(V); - } -} - -// Reads a list of symbols, e.g. "VerStr { global: foo; bar; local: *; };". -void ScriptParser::readVersionDeclaration(StringRef VerStr) { - // Identifiers start at 2 because 0 and 1 are reserved - // for VER_NDX_LOCAL and VER_NDX_GLOBAL constants. - uint16_t VersionId = Config->VersionDefinitions.size() + 2; - Config->VersionDefinitions.push_back({VerStr, VersionId}); - - // Read global symbols. - if (consume("global:") || peek() != "local:") - Config->VersionDefinitions.back().Globals = readSymbols(); - - readLocals(); - expect("}"); - - // Each version may have a parent version. For example, "Ver2" - // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" - // as a parent. This version hierarchy is, probably against your - // instinct, purely for hint; the runtime doesn't care about it - // at all. In LLD, we simply ignore it. - if (peek() != ";") - skip(); - expect(";"); -} - -// Reads a list of symbols for a versions cript. -std::vector<SymbolVersion> ScriptParser::readSymbols() { - std::vector<SymbolVersion> Ret; - for (;;) { - if (consume("extern")) { - for (SymbolVersion V : readVersionExtern()) - Ret.push_back(V); - continue; - } - - if (peek() == "}" || peek() == "local:" || Error) - break; - StringRef Tok = next(); - Ret.push_back({unquote(Tok), false, hasWildcard(Tok)}); - expect(";"); - } - return Ret; -} - -// Reads an "extern C++" directive, e.g., -// "extern "C++" { ns::*; "f(int, double)"; };" -std::vector<SymbolVersion> ScriptParser::readVersionExtern() { - StringRef Tok = next(); - bool IsCXX = Tok == "\"C++\""; - if (!IsCXX && Tok != "\"C\"") - setError("Unknown language"); - expect("{"); - - std::vector<SymbolVersion> Ret; - while (!Error && peek() != "}") { - StringRef Tok = next(); - bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); - Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); - expect(";"); - } - - expect("}"); - expect(";"); - return Ret; -} - -void elf::readLinkerScript(MemoryBufferRef MB) { - ScriptParser(MB).readLinkerScript(); -} - -void elf::readVersionScript(MemoryBufferRef MB) { - ScriptParser(MB).readVersionScript(); -} - -void elf::readDynamicList(MemoryBufferRef MB) { - ScriptParser(MB).readDynamicList(); -} +template void OutputSectionCommand::writeTo<ELF32LE>(uint8_t *Buf); +template void OutputSectionCommand::writeTo<ELF32BE>(uint8_t *Buf); +template void OutputSectionCommand::writeTo<ELF64LE>(uint8_t *Buf); +template void OutputSectionCommand::writeTo<ELF64BE>(uint8_t *Buf); -template class elf::LinkerScript<ELF32LE>; -template class elf::LinkerScript<ELF32BE>; -template class elf::LinkerScript<ELF64LE>; -template class elf::LinkerScript<ELF64BE>; +template void OutputSectionCommand::maybeCompress<ELF32LE>(); +template void OutputSectionCommand::maybeCompress<ELF32BE>(); +template void OutputSectionCommand::maybeCompress<ELF64LE>(); +template void OutputSectionCommand::maybeCompress<ELF64BE>(); |
