diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 11:08:33 +0000 |
commit | 20d35e67e67f106f617c939725101223211659f0 (patch) | |
tree | 64eb963cbf5ba58765e0a6b64a440965d66a7a4d /ELF/ScriptParser.cpp | |
parent | ae1a339de31cf4065777531959a11e55a2e5fa00 (diff) |
Notes
Diffstat (limited to 'ELF/ScriptParser.cpp')
-rw-r--r-- | ELF/ScriptParser.cpp | 304 |
1 files changed, 221 insertions, 83 deletions
diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp index 4263944981f2..ddb4a49a3e5e 100644 --- a/ELF/ScriptParser.cpp +++ b/ELF/ScriptParser.cpp @@ -63,6 +63,7 @@ private: void readExtern(); void readGroup(); void readInclude(); + void readInput(); void readMemory(); void readOutput(); void readOutputArch(); @@ -74,12 +75,14 @@ private: void readVersion(); void readVersionScriptCommand(); - SymbolAssignment *readAssignment(StringRef Name); + SymbolAssignment *readSymbolAssignment(StringRef Name); ByteCommand *readByteCommand(StringRef Tok); uint32_t readFill(); uint32_t parseFill(StringRef Tok); void readSectionAddressType(OutputSection *Cmd); + OutputSection *readOverlaySectionDescription(); OutputSection *readOutputSectionDescription(StringRef OutSec); + std::vector<BaseCommand *> readOverlay(); std::vector<StringRef> readOutputSectionPhdrs(); InputSectionDescription *readInputSectionDescription(StringRef Tok); StringMatcher readFilePatterns(); @@ -88,16 +91,16 @@ private: unsigned readPhdrType(); SortSectionPolicy readSortKind(); SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); - SymbolAssignment *readProvideOrAssignment(StringRef Tok); + SymbolAssignment *readAssignment(StringRef Tok); void readSort(); - AssertCommand *readAssert(); - Expr readAssertExpr(); + Expr readAssert(); Expr readConstant(); Expr getPageSize(); uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); std::pair<uint32_t, uint32_t> readMemoryAttributes(); + Expr combine(StringRef Op, Expr L, Expr R); Expr readExpr(); Expr readExpr1(Expr Lhs, int MinPrec); StringRef readParenLiteral(); @@ -157,17 +160,6 @@ static ExprValue sub(ExprValue A, ExprValue B) { return {A.Sec, false, A.getSectionOffset() - B.getValue(), A.Loc}; } -static ExprValue mul(ExprValue A, ExprValue B) { - return A.getValue() * B.getValue(); -} - -static ExprValue div(ExprValue A, ExprValue B) { - if (uint64_t BV = B.getValue()) - return A.getValue() / BV; - error("division by zero"); - return 0; -} - static ExprValue bitAnd(ExprValue A, ExprValue B) { moveAbsRight(A, B); return {A.Sec, A.ForceAbsolute, @@ -237,16 +229,16 @@ void ScriptParser::readLinkerScript() { if (Tok == ";") continue; - if (Tok == "ASSERT") { - Script->SectionCommands.push_back(readAssert()); - } else if (Tok == "ENTRY") { + if (Tok == "ENTRY") { readEntry(); } else if (Tok == "EXTERN") { readExtern(); - } else if (Tok == "GROUP" || Tok == "INPUT") { + } else if (Tok == "GROUP") { readGroup(); } else if (Tok == "INCLUDE") { readInclude(); + } else if (Tok == "INPUT") { + readInput(); } else if (Tok == "MEMORY") { readMemory(); } else if (Tok == "OUTPUT") { @@ -265,7 +257,7 @@ void ScriptParser::readLinkerScript() { readSections(); } else if (Tok == "VERSION") { readVersion(); - } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + } else if (SymbolAssignment *Cmd = readAssignment(Tok)) { Script->SectionCommands.push_back(Cmd); } else { setError("unknown directive: " + Tok); @@ -336,13 +328,12 @@ void ScriptParser::readExtern() { } void ScriptParser::readGroup() { - expect("("); - while (!errorCount() && !consume(")")) { - if (consume("AS_NEEDED")) - readAsNeeded(); - else - addFile(unquote(next())); - } + bool Orig = InputFile::IsInGroup; + InputFile::IsInGroup = true; + readInput(); + InputFile::IsInGroup = Orig; + if (!Orig) + ++InputFile::NextGroupId; } void ScriptParser::readInclude() { @@ -353,7 +344,7 @@ void ScriptParser::readInclude() { return; } - if (Optional<std::string> Path = searchLinkerScript(Tok)) { + if (Optional<std::string> Path = searchScript(Tok)) { if (Optional<MemoryBufferRef> MB = readFile(*Path)) tokenize(*MB); return; @@ -361,6 +352,16 @@ void ScriptParser::readInclude() { setError("cannot find linker script " + Tok); } +void ScriptParser::readInput() { + expect("("); + while (!errorCount() && !consume(")")) { + if (consume("AS_NEEDED")) + readAsNeeded(); + else + addFile(unquote(next())); + } +} + void ScriptParser::readOutput() { // -o <file> takes predecence over OUTPUT(<file>). expect("("); @@ -437,6 +438,49 @@ void ScriptParser::readSearchDir() { expect(")"); } +// This reads an overlay description. Overlays are used to describe output +// sections that use the same virtual memory range and normally would trigger +// linker's sections sanity check failures. +// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description +std::vector<BaseCommand *> ScriptParser::readOverlay() { + // VA and LMA expressions are optional, though for simplicity of + // implementation we assume they are not. That is what OVERLAY was designed + // for first of all: to allow sections with overlapping VAs at different LMAs. + Expr AddrExpr = readExpr(); + expect(":"); + expect("AT"); + Expr LMAExpr = readParenExpr(); + expect("{"); + + std::vector<BaseCommand *> V; + OutputSection *Prev = nullptr; + while (!errorCount() && !consume("}")) { + // VA is the same for all sections. The LMAs are consecutive in memory + // starting from the base load address specified. + OutputSection *OS = readOverlaySectionDescription(); + OS->AddrExpr = AddrExpr; + if (Prev) + OS->LMAExpr = [=] { return Prev->getLMA() + Prev->Size; }; + else + OS->LMAExpr = LMAExpr; + V.push_back(OS); + Prev = OS; + } + + // According to the specification, at the end of the overlay, the location + // counter should be equal to the overlay base address plus size of the + // largest section seen in the overlay. + // Here we want to create the Dot assignment command to achieve that. + Expr MoveDot = [=] { + uint64_t Max = 0; + for (BaseCommand *Cmd : V) + Max = std::max(Max, cast<OutputSection>(Cmd)->Size); + return AddrExpr().getValue() + Max; + }; + V.push_back(make<SymbolAssignment>(".", MoveDot, getCurrentLocation())); + return V; +} + void ScriptParser::readSections() { Script->HasSectionsCommand = true; @@ -446,26 +490,48 @@ void ScriptParser::readSections() { Config->SingleRoRx = true; expect("{"); + std::vector<BaseCommand *> V; while (!errorCount() && !consume("}")) { StringRef Tok = next(); - BaseCommand *Cmd = readProvideOrAssignment(Tok); - if (!Cmd) { - if (Tok == "ASSERT") - Cmd = readAssert(); - else - Cmd = readOutputSectionDescription(Tok); + if (Tok == "OVERLAY") { + for (BaseCommand *Cmd : readOverlay()) + V.push_back(Cmd); + continue; } - Script->SectionCommands.push_back(Cmd); + + if (BaseCommand *Cmd = readAssignment(Tok)) + V.push_back(Cmd); + else + V.push_back(readOutputSectionDescription(Tok)); + } + + if (!atEOF() && consume("INSERT")) { + std::vector<BaseCommand *> *Dest = nullptr; + if (consume("AFTER")) + Dest = &Script->InsertAfterCommands[next()]; + else if (consume("BEFORE")) + Dest = &Script->InsertBeforeCommands[next()]; + else + setError("expected AFTER/BEFORE, but got '" + next() + "'"); + if (Dest) + Dest->insert(Dest->end(), V.begin(), V.end()); + return; } + + Script->SectionCommands.insert(Script->SectionCommands.end(), V.begin(), + V.end()); } static int precedence(StringRef Op) { return StringSwitch<int>(Op) - .Cases("*", "/", 5) - .Cases("+", "-", 4) - .Cases("<<", ">>", 3) - .Cases("<", "<=", ">", ">=", "==", "!=", 2) - .Cases("&", "|", 1) + .Cases("*", "/", "%", 8) + .Cases("+", "-", 7) + .Cases("<<", ">>", 6) + .Cases("<", "<=", ">", ">=", "==", "!=", 5) + .Case("&", 4) + .Case("|", 3) + .Case("&&", 2) + .Case("||", 1) .Default(-1); } @@ -588,11 +654,7 @@ void ScriptParser::readSort() { expect(")"); } -AssertCommand *ScriptParser::readAssert() { - return make<AssertCommand>(readAssertExpr()); -} - -Expr ScriptParser::readAssertExpr() { +Expr ScriptParser::readAssert() { expect("("); Expr E = readExpr(); expect(","); @@ -617,12 +679,14 @@ uint32_t ScriptParser::readFill() { return V; } -// Reads an expression and/or the special directive "(NOLOAD)" for an -// output section definition. +// Reads an expression and/or the special directive for an output +// section definition. Directive is one of following: "(NOLOAD)", +// "(COPY)", "(INFO)" or "(OVERLAY)". // // An output section name can be followed by an address expression -// and/or by "(NOLOAD)". This grammar is not LL(1) because "(" can be -// interpreted as either the beginning of some expression or "(NOLOAD)". +// and/or directive. This grammar is not LL(1) because "(" can be +// interpreted as either the beginning of some expression or beginning +// of directive. // // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html @@ -633,6 +697,11 @@ void ScriptParser::readSectionAddressType(OutputSection *Cmd) { Cmd->Noload = true; return; } + if (consume("COPY") || consume("INFO") || consume("OVERLAY")) { + expect(")"); + Cmd->NonAlloc = true; + return; + } Cmd->AddrExpr = readExpr(); expect(")"); } else { @@ -657,10 +726,23 @@ static Expr checkAlignment(Expr E, std::string &Loc) { }; } +OutputSection *ScriptParser::readOverlaySectionDescription() { + OutputSection *Cmd = + Script->createOutputSection(next(), getCurrentLocation()); + Cmd->InOverlay = true; + expect("{"); + while (!errorCount() && !consume("}")) + Cmd->SectionCommands.push_back(readInputSectionRules(next())); + Cmd->Phdrs = readOutputSectionPhdrs(); + return Cmd; +} + OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { OutputSection *Cmd = Script->createOutputSection(OutSec, getCurrentLocation()); + size_t SymbolsReferenced = Script->ReferencedSymbols.size(); + if (peek() != ":") readSectionAddressType(Cmd); expect(":"); @@ -684,13 +766,10 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { StringRef Tok = next(); if (Tok == ";") { // Empty commands are allowed. Do nothing here. - } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { + } else if (SymbolAssignment *Assign = readAssignment(Tok)) { Cmd->SectionCommands.push_back(Assign); } else if (ByteCommand *Data = readByteCommand(Tok)) { Cmd->SectionCommands.push_back(Data); - } else if (Tok == "ASSERT") { - Cmd->SectionCommands.push_back(readAssert()); - expect(";"); } else if (Tok == "CONSTRUCTORS") { // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors // by name. This is for very old file formats such as ECOFF/XCOFF. @@ -709,6 +788,14 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { if (consume(">")) Cmd->MemoryRegionName = next(); + if (consume("AT")) { + expect(">"); + Cmd->LMARegionName = next(); + } + + if (Cmd->LMAExpr && !Cmd->LMARegionName.empty()) + error("section can't have both LMA and a load region"); + Cmd->Phdrs = readOutputSectionPhdrs(); if (consume("=")) @@ -719,6 +806,8 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef OutSec) { // Consume optional comma following output section command. consume(","); + if (Script->ReferencedSymbols.size() > SymbolsReferenced) + Cmd->ExpressionsUseSymbols = true; return Cmd; } @@ -741,30 +830,39 @@ uint32_t ScriptParser::parseFill(StringRef Tok) { SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { expect("("); - SymbolAssignment *Cmd = readAssignment(next()); + SymbolAssignment *Cmd = readSymbolAssignment(next()); Cmd->Provide = Provide; Cmd->Hidden = Hidden; expect(")"); - expect(";"); return Cmd; } -SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { +SymbolAssignment *ScriptParser::readAssignment(StringRef Tok) { + // Assert expression returns Dot, so this is equal to ".=." + if (Tok == "ASSERT") + return make<SymbolAssignment>(".", readAssert(), getCurrentLocation()); + + size_t OldPos = Pos; SymbolAssignment *Cmd = nullptr; - if (peek() == "=" || peek() == "+=") { - Cmd = readAssignment(Tok); - expect(";"); - } else if (Tok == "PROVIDE") { + if (peek() == "=" || peek() == "+=") + Cmd = readSymbolAssignment(Tok); + else if (Tok == "PROVIDE") Cmd = readProvideHidden(true, false); - } else if (Tok == "HIDDEN") { + else if (Tok == "HIDDEN") Cmd = readProvideHidden(false, true); - } else if (Tok == "PROVIDE_HIDDEN") { + else if (Tok == "PROVIDE_HIDDEN") Cmd = readProvideHidden(true, true); + + if (Cmd) { + Cmd->CommandString = + Tok.str() + " " + + llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " "); + expect(";"); } return Cmd; } -SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { +SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef Name) { StringRef Op = next(); assert(Op == "=" || Op == "+="); Expr E = readExpr(); @@ -787,15 +885,31 @@ Expr ScriptParser::readExpr() { return E; } -static Expr combine(StringRef Op, Expr L, Expr R) { +Expr ScriptParser::combine(StringRef Op, Expr L, Expr R) { if (Op == "+") return [=] { return add(L(), R()); }; if (Op == "-") return [=] { return sub(L(), R()); }; if (Op == "*") - return [=] { return mul(L(), R()); }; - if (Op == "/") - return [=] { return div(L(), R()); }; + return [=] { return L().getValue() * R().getValue(); }; + if (Op == "/") { + std::string Loc = getCurrentLocation(); + return [=]() -> uint64_t { + if (uint64_t RV = R().getValue()) + return L().getValue() / RV; + error(Loc + ": division by zero"); + return 0; + }; + } + if (Op == "%") { + std::string Loc = getCurrentLocation(); + return [=]() -> uint64_t { + if (uint64_t RV = R().getValue()) + return L().getValue() % RV; + error(Loc + ": modulo by zero"); + return 0; + }; + } if (Op == "<<") return [=] { return L().getValue() << R().getValue(); }; if (Op == ">>") @@ -812,6 +926,10 @@ static Expr combine(StringRef Op, Expr L, Expr R) { return [=] { return L().getValue() == R().getValue(); }; if (Op == "!=") return [=] { return L().getValue() != R().getValue(); }; + if (Op == "||") + return [=] { return L().getValue() || R().getValue(); }; + if (Op == "&&") + return [=] { return L().getValue() && R().getValue(); }; if (Op == "&") return [=] { return bitAnd(L(), R()); }; if (Op == "|") @@ -865,20 +983,13 @@ Expr ScriptParser::readConstant() { if (S == "MAXPAGESIZE") return [] { return Config->MaxPageSize; }; setError("unknown constant: " + S); - return {}; + return [] { return 0; }; } // Parses Tok as an integer. It recognizes hexadecimal (prefixed with // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may // have "K" (Ki) or "M" (Mi) suffixes. static Optional<uint64_t> parseInt(StringRef Tok) { - // Negative number - if (Tok.startswith("-")) { - if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) - return -*Val; - return None; - } - // Hexadecimal uint64_t Val; if (Tok.startswith_lower("0x")) { @@ -917,12 +1028,21 @@ ByteCommand *ScriptParser::readByteCommand(StringRef Tok) { .Default(-1); if (Size == -1) return nullptr; - return make<ByteCommand>(readParenExpr(), Size); + + size_t OldPos = Pos; + Expr E = readParenExpr(); + std::string CommandString = + Tok.str() + " " + + llvm::join(Tokens.begin() + OldPos, Tokens.begin() + Pos, " "); + return make<ByteCommand>(E, Size, CommandString); } StringRef ScriptParser::readParenLiteral() { expect("("); + bool Orig = InExpr; + InExpr = false; StringRef Tok = next(); + InExpr = Orig; expect(")"); return Tok; } @@ -995,7 +1115,7 @@ Expr ScriptParser::readPrimary() { }; } if (Tok == "ASSERT") - return readAssertExpr(); + return readAssert(); if (Tok == "CONSTANT") return readConstant(); if (Tok == "DATA_SEGMENT_ALIGN") { @@ -1032,8 +1152,10 @@ Expr ScriptParser::readPrimary() { } if (Tok == "LENGTH") { StringRef Name = readParenLiteral(); - if (Script->MemoryRegions.count(Name) == 0) + if (Script->MemoryRegions.count(Name) == 0) { setError("memory region not defined: " + Name); + return [] { return 0; }; + } return [=] { return Script->MemoryRegions[Name]->Length; }; } if (Tok == "LOADADDR") { @@ -1044,10 +1166,22 @@ Expr ScriptParser::readPrimary() { return Cmd->getLMA(); }; } + if (Tok == "MAX" || Tok == "MIN") { + expect("("); + Expr A = readExpr(); + expect(","); + Expr B = readExpr(); + expect(")"); + if (Tok == "MIN") + return [=] { return std::min(A().getValue(), B().getValue()); }; + return [=] { return std::max(A().getValue(), B().getValue()); }; + } if (Tok == "ORIGIN") { StringRef Name = readParenLiteral(); - if (Script->MemoryRegions.count(Name) == 0) + if (Script->MemoryRegions.count(Name) == 0) { setError("memory region not defined: " + Name); + return [] { return 0; }; + } return [=] { return Script->MemoryRegions[Name]->Origin; }; } if (Tok == "SEGMENT_START") { @@ -1229,6 +1363,9 @@ ScriptParser::readSymbols() { // Reads an "extern C++" directive, e.g., // "extern "C++" { ns::*; "f(int, double)"; };" +// +// The last semicolon is optional. E.g. this is OK: +// "extern "C++" { ns::*; "f(int, double)" };" std::vector<SymbolVersion> ScriptParser::readVersionExtern() { StringRef Tok = next(); bool IsCXX = Tok == "\"C++\""; @@ -1241,6 +1378,8 @@ std::vector<SymbolVersion> ScriptParser::readVersionExtern() { StringRef Tok = next(); bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + if (consume("}")) + return Ret; expect(";"); } @@ -1280,11 +1419,10 @@ void ScriptParser::readMemory() { uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); // Add the memory region to the region map. - if (Script->MemoryRegions.count(Name)) + MemoryRegion *MR = + make<MemoryRegion>(Name, Origin, Length, Flags, NegFlags); + if (!Script->MemoryRegions.insert({Name, MR}).second) setError("region '" + Name + "' already defined"); - MemoryRegion *MR = make<MemoryRegion>(); - *MR = {Name, Origin, Length, Flags, NegFlags}; - Script->MemoryRegions[Name] = MR; } } |