diff options
Diffstat (limited to 'ELF/LinkerScript.cpp')
-rw-r--r-- | ELF/LinkerScript.cpp | 415 |
1 files changed, 232 insertions, 183 deletions
diff --git a/ELF/LinkerScript.cpp b/ELF/LinkerScript.cpp index 49e44d7804761..cebbd89168be5 100644 --- a/ELF/LinkerScript.cpp +++ b/ELF/LinkerScript.cpp @@ -43,29 +43,27 @@ using namespace llvm; using namespace llvm::ELF; using namespace llvm::object; using namespace llvm::support::endian; -using namespace lld; -using namespace lld::elf; -LinkerScript *elf::script; +namespace lld { +namespace elf { +LinkerScript *script; -static uint64_t getOutputSectionVA(SectionBase *inputSec, StringRef loc) { - if (OutputSection *os = inputSec->getOutputSection()) - return os->addr; - error(loc + ": unable to evaluate expression: input section " + - inputSec->name + " has no output section assigned"); - return 0; +static uint64_t getOutputSectionVA(SectionBase *sec) { + OutputSection *os = sec->getOutputSection(); + assert(os && "input section has no output section assigned"); + return os ? os->addr : 0; } uint64_t ExprValue::getValue() const { if (sec) - return alignTo(sec->getOffset(val) + getOutputSectionVA(sec, loc), + return alignTo(sec->getOffset(val) + getOutputSectionVA(sec), alignment); return alignTo(val, alignment); } uint64_t ExprValue::getSecAddr() const { if (sec) - return sec->getOffset(0) + getOutputSectionVA(sec, loc); + return sec->getOffset(0) + getOutputSectionVA(sec); return 0; } @@ -73,7 +71,7 @@ uint64_t ExprValue::getSectionOffset() const { // If the alignment is trivial, we don't have to compute the full // value to know the offset. This allows this function to succeed in // cases where the output section is not yet known. - if (alignment == 1 && (!sec || !sec->getOutputSection())) + if (alignment == 1 && !sec) return val; return getValue() - getSecAddr(); } @@ -157,8 +155,8 @@ static bool shouldDefineSym(SymbolAssignment *cmd) { return false; } -// This function is called from processSectionCommands, -// while we are fixing the output section layout. +// Called by processSymbolAssignments() to assign definitions to +// linker-script-defined symbols. void LinkerScript::addSymbol(SymbolAssignment *cmd) { if (!shouldDefineSym(cmd)) return; @@ -181,12 +179,12 @@ void LinkerScript::addSymbol(SymbolAssignment *cmd) { // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`. uint64_t symValue = value.sec ? 0 : value.getValue(); - Defined New(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, symValue, - 0, sec); + Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, + symValue, 0, sec); Symbol *sym = symtab->insert(cmd->name); - sym->mergeProperties(New); - sym->replace(New); + sym->mergeProperties(newSym); + sym->replace(newSym); cmd->sym = cast<Defined>(sym); } @@ -197,19 +195,57 @@ static void declareSymbol(SymbolAssignment *cmd) { return; uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; - Defined New(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0, - nullptr); + Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0, + nullptr); // We can't calculate final value right now. Symbol *sym = symtab->insert(cmd->name); - sym->mergeProperties(New); - sym->replace(New); + sym->mergeProperties(newSym); + sym->replace(newSym); cmd->sym = cast<Defined>(sym); cmd->provide = false; sym->scriptDefined = true; } +using SymbolAssignmentMap = + DenseMap<const Defined *, std::pair<SectionBase *, uint64_t>>; + +// Collect section/value pairs of linker-script-defined symbols. This is used to +// check whether symbol values converge. +static SymbolAssignmentMap +getSymbolAssignmentValues(const std::vector<BaseCommand *> §ionCommands) { + SymbolAssignmentMap ret; + for (BaseCommand *base : sectionCommands) { + if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { + if (cmd->sym) // sym is nullptr for dot. + ret.try_emplace(cmd->sym, + std::make_pair(cmd->sym->section, cmd->sym->value)); + continue; + } + for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) + if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) + if (cmd->sym) + ret.try_emplace(cmd->sym, + std::make_pair(cmd->sym->section, cmd->sym->value)); + } + return ret; +} + +// Returns the lexicographical smallest (for determinism) Defined whose +// section/value has changed. +static const Defined * +getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { + const Defined *changed = nullptr; + for (auto &it : oldValues) { + const Defined *sym = it.first; + if (std::make_pair(sym->section, sym->value) != it.second && + (!changed || sym->getName() < changed->getName())) + changed = sym; + } + return changed; +} + // This method is used to handle INSERT AFTER statement. Here we rebuild // the list of script commands to mix sections inserted into. void LinkerScript::processInsertCommands() { @@ -305,46 +341,44 @@ bool LinkerScript::shouldKeep(InputSectionBase *s) { } // A helper function for the SORT() command. -static std::function<bool(InputSectionBase *, InputSectionBase *)> -getComparator(SortSectionPolicy k) { - switch (k) { - case SortSectionPolicy::Alignment: - return [](InputSectionBase *a, InputSectionBase *b) { - // ">" is not a mistake. Sections with larger alignments are placed - // before sections with smaller alignments in order to reduce the - // amount of padding necessary. This is compatible with GNU. - return a->alignment > b->alignment; - }; - case SortSectionPolicy::Name: - return [](InputSectionBase *a, InputSectionBase *b) { - return a->name < b->name; - }; - case SortSectionPolicy::Priority: - return [](InputSectionBase *a, InputSectionBase *b) { - return getPriority(a->name) < getPriority(b->name); - }; - default: - llvm_unreachable("unknown sort policy"); - } -} - -// A helper function for the SORT() command. -static bool matchConstraints(ArrayRef<InputSection *> sections, +static bool matchConstraints(ArrayRef<InputSectionBase *> sections, ConstraintKind kind) { if (kind == ConstraintKind::NoConstraint) return true; bool isRW = llvm::any_of( - sections, [](InputSection *sec) { return sec->flags & SHF_WRITE; }); + sections, [](InputSectionBase *sec) { return sec->flags & SHF_WRITE; }); return (isRW && kind == ConstraintKind::ReadWrite) || (!isRW && kind == ConstraintKind::ReadOnly); } -static void sortSections(MutableArrayRef<InputSection *> vec, +static void sortSections(MutableArrayRef<InputSectionBase *> vec, SortSectionPolicy k) { - if (k != SortSectionPolicy::Default && k != SortSectionPolicy::None) - llvm::stable_sort(vec, getComparator(k)); + auto alignmentComparator = [](InputSectionBase *a, InputSectionBase *b) { + // ">" is not a mistake. Sections with larger alignments are placed + // before sections with smaller alignments in order to reduce the + // amount of padding necessary. This is compatible with GNU. + return a->alignment > b->alignment; + }; + auto nameComparator = [](InputSectionBase *a, InputSectionBase *b) { + return a->name < b->name; + }; + auto priorityComparator = [](InputSectionBase *a, InputSectionBase *b) { + return getPriority(a->name) < getPriority(b->name); + }; + + switch (k) { + case SortSectionPolicy::Default: + case SortSectionPolicy::None: + return; + case SortSectionPolicy::Alignment: + return llvm::stable_sort(vec, alignmentComparator); + case SortSectionPolicy::Name: + return llvm::stable_sort(vec, nameComparator); + case SortSectionPolicy::Priority: + return llvm::stable_sort(vec, priorityComparator); + } } // Sort sections as instructed by SORT-family commands and --sort-section @@ -358,7 +392,7 @@ static void sortSections(MutableArrayRef<InputSection *> vec, // --sort-section is handled as an inner SORT command. // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. -static void sortInputSections(MutableArrayRef<InputSection *> vec, +static void sortInputSections(MutableArrayRef<InputSectionBase *> vec, const SectionPattern &pat) { if (pat.sortOuter == SortSectionPolicy::None) return; @@ -371,16 +405,16 @@ static void sortInputSections(MutableArrayRef<InputSection *> vec, } // Compute and remember which sections the InputSectionDescription matches. -std::vector<InputSection *> +std::vector<InputSectionBase *> LinkerScript::computeInputSections(const InputSectionDescription *cmd) { - std::vector<InputSection *> ret; + std::vector<InputSectionBase *> ret; // Collects all sections that satisfy constraints of Cmd. for (const SectionPattern &pat : cmd->sectionPatterns) { size_t sizeBefore = ret.size(); for (InputSectionBase *sec : inputSections) { - if (!sec->isLive() || sec->assigned) + if (!sec->isLive() || sec->parent) continue; // For -emit-relocs we have to ignore entries like @@ -388,9 +422,9 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { // which are common because they are in the default bfd script. // We do not ignore SHT_REL[A] linker-synthesized sections here because // want to support scripts that do custom layout for them. - if (auto *isec = dyn_cast<InputSection>(sec)) - if (isec->getRelocatedSection()) - continue; + if (isa<InputSection>(sec) && + cast<InputSection>(sec)->getRelocatedSection()) + continue; std::string filename = getFilename(sec->file); if (!cmd->filePat.match(filename) || @@ -398,88 +432,60 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd) { !pat.sectionPat.match(sec->name)) continue; - // It is safe to assume that Sec is an InputSection - // because mergeable or EH input sections have already been - // handled and eliminated. - ret.push_back(cast<InputSection>(sec)); - sec->assigned = true; + ret.push_back(sec); } - sortInputSections(MutableArrayRef<InputSection *>(ret).slice(sizeBefore), - pat); + sortInputSections( + MutableArrayRef<InputSectionBase *>(ret).slice(sizeBefore), pat); } return ret; } -void LinkerScript::discard(ArrayRef<InputSection *> v) { - for (InputSection *s : v) { - if (s == in.shStrTab || s == mainPart->relaDyn || s == mainPart->relrDyn) - error("discarding " + s->name + " section is not allowed"); - - // You can discard .hash and .gnu.hash sections by linker scripts. Since - // they are synthesized sections, we need to handle them differently than - // other regular sections. - if (s == mainPart->gnuHashTab) - mainPart->gnuHashTab = nullptr; - if (s == mainPart->hashTab) - mainPart->hashTab = nullptr; - - s->assigned = false; - s->markDead(); - discard(s->dependentSections); - } +void LinkerScript::discard(InputSectionBase *s) { + if (s == in.shStrTab || s == mainPart->relaDyn || s == mainPart->relrDyn) + error("discarding " + s->name + " section is not allowed"); + + // You can discard .hash and .gnu.hash sections by linker scripts. Since + // they are synthesized sections, we need to handle them differently than + // other regular sections. + if (s == mainPart->gnuHashTab) + mainPart->gnuHashTab = nullptr; + if (s == mainPart->hashTab) + mainPart->hashTab = nullptr; + + s->markDead(); + s->parent = nullptr; + for (InputSection *ds : s->dependentSections) + discard(ds); } -std::vector<InputSection *> +std::vector<InputSectionBase *> LinkerScript::createInputSectionList(OutputSection &outCmd) { - std::vector<InputSection *> ret; + std::vector<InputSectionBase *> ret; for (BaseCommand *base : outCmd.sectionCommands) { if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { - cmd->sections = computeInputSections(cmd); - ret.insert(ret.end(), cmd->sections.begin(), cmd->sections.end()); + cmd->sectionBases = computeInputSections(cmd); + for (InputSectionBase *s : cmd->sectionBases) + s->parent = &outCmd; + ret.insert(ret.end(), cmd->sectionBases.begin(), cmd->sectionBases.end()); } } return ret; } +// Create output sections described by SECTIONS commands. void LinkerScript::processSectionCommands() { - // A symbol can be assigned before any section is mentioned in the linker - // script. In an DSO, the symbol values are addresses, so the only important - // section values are: - // * SHN_UNDEF - // * SHN_ABS - // * Any value meaning a regular section. - // To handle that, create a dummy aether section that fills the void before - // the linker scripts switches to another section. It has an index of one - // which will map to whatever the first actual section is. - aether = make<OutputSection>("", 0, SHF_ALLOC); - aether->sectionIndex = 1; - - // Ctx captures the local AddressState and makes it accessible deliberately. - // This is needed as there are some cases where we cannot just - // thread the current state through to a lambda function created by the - // script parser. - auto deleter = make_unique<AddressState>(); - ctx = deleter.get(); - ctx->outSec = aether; - size_t i = 0; - // Add input sections to output sections. for (BaseCommand *base : sectionCommands) { - // Handle symbol assignments outside of any output section. - if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { - addSymbol(cmd); - continue; - } - if (auto *sec = dyn_cast<OutputSection>(base)) { - std::vector<InputSection *> v = createInputSectionList(*sec); + std::vector<InputSectionBase *> v = createInputSectionList(*sec); // The output section name `/DISCARD/' is special. // Any input section assigned to it is discarded. if (sec->name == "/DISCARD/") { - discard(v); + for (InputSectionBase *s : v) + discard(s); sec->sectionCommands.clear(); continue; } @@ -493,17 +499,11 @@ void LinkerScript::processSectionCommands() { // way to "make it as if it wasn't present" is to make it empty. if (!matchConstraints(v, sec->constraint)) { for (InputSectionBase *s : v) - s->assigned = false; + s->parent = nullptr; sec->sectionCommands.clear(); continue; } - // A directive may contain symbol definitions like this: - // ".foo : { ...; bar = .; }". Handle them. - for (BaseCommand *base : sec->sectionCommands) - if (auto *outCmd = dyn_cast<SymbolAssignment>(base)) - addSymbol(outCmd); - // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign // is given, input sections are aligned to that value, whether the // given value is larger or smaller than the original section alignment. @@ -513,17 +513,40 @@ void LinkerScript::processSectionCommands() { s->alignment = subalign; } - // Add input sections to an output section. - for (InputSection *s : v) - sec->addSection(s); + // Set the partition field the same way OutputSection::recordSection() + // does. Partitions cannot be used with the SECTIONS command, so this is + // always 1. + sec->partition = 1; sec->sectionIndex = i++; - if (sec->noload) - sec->type = SHT_NOBITS; - if (sec->nonAlloc) - sec->flags &= ~(uint64_t)SHF_ALLOC; } } +} + +void LinkerScript::processSymbolAssignments() { + // Dot outside an output section still represents a relative address, whose + // sh_shndx should not be SHN_UNDEF or SHN_ABS. Create a dummy aether section + // that fills the void outside a section. It has an index of one, which is + // indistinguishable from any other regular section index. + aether = make<OutputSection>("", 0, SHF_ALLOC); + aether->sectionIndex = 1; + + // ctx captures the local AddressState and makes it accessible deliberately. + // This is needed as there are some cases where we cannot just thread the + // current state through to a lambda function created by the script parser. + AddressState state; + ctx = &state; + ctx->outSec = aether; + + for (BaseCommand *base : sectionCommands) { + if (auto *cmd = dyn_cast<SymbolAssignment>(base)) + addSymbol(cmd); + else + for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) + if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) + addSymbol(cmd); + } + ctx = nullptr; } @@ -539,7 +562,7 @@ static OutputSection *findByName(ArrayRef<BaseCommand *> vec, static OutputSection *createSection(InputSectionBase *isec, StringRef outsecName) { OutputSection *sec = script->createOutputSection(outsecName, "<internal>"); - sec->addSection(cast<InputSection>(isec)); + sec->recordSection(isec); return sec; } @@ -568,7 +591,7 @@ addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, OutputSection *out = sec->getRelocatedSection()->getOutputSection(); if (out->relocationSection) { - out->relocationSection->addSection(sec); + out->relocationSection->recordSection(sec); return nullptr; } @@ -576,12 +599,6 @@ addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, return out->relocationSection; } - // When control reaches here, mergeable sections have already been merged into - // synthetic sections. For relocatable case we want to create one output - // section per syntetic section so that they have a valid sh_entsize. - if (config->relocatable && (isec->flags & SHF_MERGE)) - return createSection(isec, outsecName); - // The ELF spec just says // ---------------------------------------------------------------- // In the first phase, input sections that match in name, type and @@ -628,7 +645,21 @@ addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, for (OutputSection *sec : v) { if (sec->partition != isec->partition) continue; - sec->addSection(cast<InputSection>(isec)); + + if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) { + // Merging two SHF_LINK_ORDER sections with different sh_link fields will + // change their semantics, so we only merge them in -r links if they will + // end up being linked to the same output section. The casts are fine + // because everything in the map was created by the orphan placement code. + auto *firstIsec = cast<InputSectionBase>( + cast<InputSectionDescription>(sec->sectionCommands[0]) + ->sectionBases[0]); + if (firstIsec->getLinkOrderDep()->getOutputSection() != + isec->getLinkOrderDep()->getOutputSection()) + continue; + } + + sec->recordSection(isec); return nullptr; } @@ -642,25 +673,30 @@ void LinkerScript::addOrphanSections() { StringMap<TinyPtrVector<OutputSection *>> map; std::vector<OutputSection *> v; - auto add = [&](InputSectionBase *s) { - if (!s->isLive() || s->parent) - return; - - StringRef name = getOutputSectionName(s); - - if (config->orphanHandling == OrphanHandlingPolicy::Error) - error(toString(s) + " is being placed in '" + name + "'"); - else if (config->orphanHandling == OrphanHandlingPolicy::Warn) - warn(toString(s) + " is being placed in '" + name + "'"); - - if (OutputSection *sec = findByName(sectionCommands, name)) { - sec->addSection(cast<InputSection>(s)); - return; + std::function<void(InputSectionBase *)> add; + add = [&](InputSectionBase *s) { + if (s->isLive() && !s->parent) { + StringRef name = getOutputSectionName(s); + + if (config->orphanHandling == OrphanHandlingPolicy::Error) + error(toString(s) + " is being placed in '" + name + "'"); + else if (config->orphanHandling == OrphanHandlingPolicy::Warn) + warn(toString(s) + " is being placed in '" + name + "'"); + + if (OutputSection *sec = findByName(sectionCommands, name)) { + sec->recordSection(s); + } else { + if (OutputSection *os = addInputSec(map, s, name)) + v.push_back(os); + assert(isa<MergeInputSection>(s) || + s->getOutputSection()->sectionIndex == UINT32_MAX); + } } - if (OutputSection *os = addInputSec(map, s, name)) - v.push_back(os); - assert(s->getOutputSection()->sectionIndex == UINT32_MAX); + if (config->relocatable) + for (InputSectionBase *depSec : s->dependentSections) + if (depSec->flags & SHF_LINK_ORDER) + add(depSec); }; // For futher --emit-reloc handling code we need target output section @@ -668,6 +704,12 @@ void LinkerScript::addOrphanSections() { // to create target sections first. We do not want priority handling // for synthetic sections because them are special. for (InputSectionBase *isec : inputSections) { + // In -r links, SHF_LINK_ORDER sections are added while adding their parent + // sections because we need to know the parent's output section before we + // can select an output section for the SHF_LINK_ORDER section. + if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) + continue; + if (auto *sec = dyn_cast<InputSection>(isec)) if (InputSectionBase *rel = sec->getRelocatedSection()) if (auto *relIS = dyn_cast_or_null<InputSectionBase>(rel->parent)) @@ -772,6 +814,14 @@ void LinkerScript::assignOffsets(OutputSection *sec) { if ((sec->flags & SHF_ALLOC) && sec->addrExpr) setDot(sec->addrExpr, sec->location, false); + // If the address of the section has been moved forward by an explicit + // expression so that it now starts past the current curPos of the enclosing + // region, we need to expand the current region to account for the space + // between the previous section, if any, and the start of this section. + if (ctx->memRegion && ctx->memRegion->curPos < dot) + expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, + ctx->memRegion->name, sec->name); + switchTo(sec); if (sec->lmaExpr) @@ -972,17 +1022,13 @@ static uint64_t computeBase(uint64_t min, bool allocateHeaders) { return alignDown(min, config->maxPageSize); } -// Try to find an address for the file and program headers output sections, -// which were unconditionally added to the first PT_LOAD segment earlier. -// -// When using the default layout, we check if the headers fit below the first -// allocated section. When using a linker script, we also check if the headers -// are covered by the output section. This allows omitting the headers by not -// leaving enough space for them in the linker script; this pattern is common -// in embedded systems. +// When the SECTIONS command is used, try to find an address for the file and +// program headers output sections, which can be added to the first PT_LOAD +// segment when program headers are created. // -// If there isn't enough space for these sections, we'll remove them from the -// PT_LOAD segment, and we'll also remove the PT_PHDR segment. +// We check if the headers fit below the first allocated section. If there isn't +// enough space for these sections, we'll remove them from the PT_LOAD segment, +// and we'll also remove the PT_PHDR segment. void LinkerScript::allocateHeaders(std::vector<PhdrEntry *> &phdrs) { uint64_t min = std::numeric_limits<uint64_t>::max(); for (OutputSection *sec : outputSections) @@ -1028,32 +1074,30 @@ LinkerScript::AddressState::AddressState() { } } -static uint64_t getInitialDot() { - // By default linker scripts use an initial value of 0 for '.', - // but prefer -image-base if set. - if (script->hasSectionsCommand) - return config->imageBase ? *config->imageBase : 0; - - uint64_t startAddr = UINT64_MAX; - // The sections with -T<section> have been sorted in order of ascending - // address. We must lower startAddr if the lowest -T<section address> as - // calls to setDot() must be monotonically increasing. - for (auto &kv : config->sectionStartMap) - startAddr = std::min(startAddr, kv.second); - return std::min(startAddr, target->getImageBase() + elf::getHeaderSize()); -} - // Here we assign addresses as instructed by linker script SECTIONS // sub-commands. Doing that allows us to use final VA values, so here // we also handle rest commands like symbol assignments and ASSERTs. -void LinkerScript::assignAddresses() { - dot = getInitialDot(); +// Returns a symbol that has changed its section or value, or nullptr if no +// symbol has changed. +const Defined *LinkerScript::assignAddresses() { + if (script->hasSectionsCommand) { + // With a linker script, assignment of addresses to headers is covered by + // allocateHeaders(). + dot = config->imageBase.getValueOr(0); + } else { + // Assign addresses to headers right now. + dot = target->getImageBase(); + Out::elfHeader->addr = dot; + Out::programHeaders->addr = dot + Out::elfHeader->size; + dot += getHeaderSize(); + } - auto deleter = make_unique<AddressState>(); + auto deleter = std::make_unique<AddressState>(); ctx = deleter.get(); errorOnMissingSection = true; switchTo(aether); + SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); for (BaseCommand *base : sectionCommands) { if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { cmd->addr = dot; @@ -1063,7 +1107,9 @@ void LinkerScript::assignAddresses() { } assignOffsets(cast<OutputSection>(base)); } + ctx = nullptr; + return getChangedSymbolAssignment(oldValues); } // Creates program headers as instructed by PHDRS linker script command. @@ -1156,3 +1202,6 @@ std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *cmd) { } return ret; } + +} // namespace elf +} // namespace lld |