diff options
Diffstat (limited to 'ELF/ScriptParser.cpp')
-rw-r--r-- | ELF/ScriptParser.cpp | 1235 |
1 files changed, 1106 insertions, 129 deletions
diff --git a/ELF/ScriptParser.cpp b/ELF/ScriptParser.cpp index c740685a15a14..032ecd50f3e3f 100644 --- a/ELF/ScriptParser.cpp +++ b/ELF/ScriptParser.cpp @@ -7,194 +7,1171 @@ // //===----------------------------------------------------------------------===// // -// This file contains the base parser class for linker script and dynamic -// list. +// This file contains a recursive-descendent parser for linker scripts. +// Parsed results are stored to Config and Script global objects. // //===----------------------------------------------------------------------===// #include "ScriptParser.h" -#include "Error.h" -#include "llvm/ADT/Twine.h" +#include "Config.h" +#include "Driver.h" +#include "InputSection.h" +#include "LinkerScript.h" +#include "Memory.h" +#include "OutputSections.h" +#include "ScriptLexer.h" +#include "Symbols.h" +#include "Target.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include <cassert> +#include <limits> +#include <vector> using namespace llvm; +using namespace llvm::ELF; +using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -// Returns a whole line containing the current token. -StringRef ScriptParserBase::getLine() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; +static bool isUnderSysroot(StringRef Path); - size_t Pos = S.rfind('\n', Tok.data() - S.data()); - if (Pos != StringRef::npos) - S = S.substr(Pos + 1); - return S.substr(0, S.find_first_of("\r\n")); +namespace { +class ScriptParser final : ScriptLexer { +public: + ScriptParser(MemoryBufferRef MB) + : ScriptLexer(MB), + IsUnderSysroot(isUnderSysroot(MB.getBufferIdentifier())) {} + + void readLinkerScript(); + void readVersionScript(); + void readDynamicList(); + +private: + void addFile(StringRef Path); + + void readAsNeeded(); + void readEntry(); + void readExtern(); + void readGroup(); + void readInclude(); + void readMemory(); + void readOutput(); + void readOutputArch(); + void readOutputFormat(); + void readPhdrs(); + void readSearchDir(); + void readSections(); + void readVersion(); + void readVersionScriptCommand(); + + SymbolAssignment *readAssignment(StringRef Name); + BytesDataCommand *readBytesDataCommand(StringRef Tok); + uint32_t readFill(); + uint32_t parseFill(StringRef Tok); + OutputSectionCommand *readOutputSectionDescription(StringRef OutSec); + std::vector<StringRef> readOutputSectionPhdrs(); + InputSectionDescription *readInputSectionDescription(StringRef Tok); + StringMatcher readFilePatterns(); + std::vector<SectionPattern> readInputSectionsList(); + InputSectionDescription *readInputSectionRules(StringRef FilePattern); + unsigned readPhdrType(); + SortSectionPolicy readSortKind(); + SymbolAssignment *readProvideHidden(bool Provide, bool Hidden); + SymbolAssignment *readProvideOrAssignment(StringRef Tok); + void readSort(); + AssertCommand *readAssert(); + Expr readAssertExpr(); + + uint64_t readMemoryAssignment(StringRef, StringRef, StringRef); + std::pair<uint32_t, uint32_t> readMemoryAttributes(); + + Expr readExpr(); + Expr readExpr1(Expr Lhs, int MinPrec); + StringRef readParenLiteral(); + Expr readPrimary(); + Expr readTernary(Expr Cond); + Expr readParenExpr(); + + // For parsing version script. + std::vector<SymbolVersion> readVersionExtern(); + void readAnonymousDeclaration(); + void readVersionDeclaration(StringRef VerStr); + + std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> + readSymbols(); + + bool IsUnderSysroot; +}; +} // namespace + +static bool isUnderSysroot(StringRef Path) { + if (Config->Sysroot == "") + return false; + for (; !Path.empty(); Path = sys::path::parent_path(Path)) + if (sys::fs::equivalent(Config->Sysroot, Path)) + return true; + return false; } -// Returns 1-based line number of the current token. -size_t ScriptParserBase::getLineNumber() { - StringRef S = getCurrentMB().getBuffer(); - StringRef Tok = Tokens[Pos - 1]; - return S.substr(0, Tok.data() - S.data()).count('\n') + 1; +// Some operations only support one non absolute value. Move the +// absolute one to the right hand side for convenience. +static void moveAbsRight(ExprValue &A, ExprValue &B) { + if (A.isAbsolute()) + std::swap(A, B); + if (!B.isAbsolute()) + error("At least one side of the expression must be absolute"); } -// Returns 0-based column number of the current token. -size_t ScriptParserBase::getColumnNumber() { - StringRef Tok = Tokens[Pos - 1]; - return Tok.data() - getLine().data(); +static ExprValue add(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, A.Val + B.getValue()}; } -std::string ScriptParserBase::getCurrentLocation() { - std::string Filename = getCurrentMB().getBufferIdentifier(); - if (!Pos) - return Filename; - return (Filename + ":" + Twine(getLineNumber())).str(); +static ExprValue sub(ExprValue A, ExprValue B) { + return {A.Sec, A.Val - B.getValue()}; } -ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } +static ExprValue mul(ExprValue A, ExprValue B) { + return A.getValue() * B.getValue(); +} -// We don't want to record cascading errors. Keep only the first one. -void ScriptParserBase::setError(const Twine &Msg) { - if (Error) +static ExprValue div(ExprValue A, ExprValue B) { + if (uint64_t BV = B.getValue()) + return A.getValue() / BV; + error("division by zero"); + return 0; +} + +static ExprValue bitAnd(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, + (A.getValue() & B.getValue()) - A.getSecAddr()}; +} + +static ExprValue bitOr(ExprValue A, ExprValue B) { + moveAbsRight(A, B); + return {A.Sec, A.ForceAbsolute, + (A.getValue() | B.getValue()) - A.getSecAddr()}; +} + +void ScriptParser::readDynamicList() { + expect("{"); + readAnonymousDeclaration(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScript() { + readVersionScriptCommand(); + if (!atEOF()) + setError("EOF expected, but got " + next()); +} + +void ScriptParser::readVersionScriptCommand() { + if (consume("{")) { + readAnonymousDeclaration(); return; - Error = true; + } + + while (!atEOF() && !Error && peek() != "}") { + StringRef VerStr = next(); + if (VerStr == "{") { + setError("anonymous version definition is used in " + "combination with other version definitions"); + return; + } + expect("{"); + readVersionDeclaration(VerStr); + } +} + +void ScriptParser::readVersion() { + expect("{"); + readVersionScriptCommand(); + expect("}"); +} - if (!Pos) { - error(getCurrentLocation() + ": " + Msg); +void ScriptParser::readLinkerScript() { + while (!atEOF()) { + StringRef Tok = next(); + if (Tok == ";") + continue; + + if (Tok == "ASSERT") { + Script->Opt.Commands.push_back(readAssert()); + } else if (Tok == "ENTRY") { + readEntry(); + } else if (Tok == "EXTERN") { + readExtern(); + } else if (Tok == "GROUP" || Tok == "INPUT") { + readGroup(); + } else if (Tok == "INCLUDE") { + readInclude(); + } else if (Tok == "MEMORY") { + readMemory(); + } else if (Tok == "OUTPUT") { + readOutput(); + } else if (Tok == "OUTPUT_ARCH") { + readOutputArch(); + } else if (Tok == "OUTPUT_FORMAT") { + readOutputFormat(); + } else if (Tok == "PHDRS") { + readPhdrs(); + } else if (Tok == "SEARCH_DIR") { + readSearchDir(); + } else if (Tok == "SECTIONS") { + readSections(); + } else if (Tok == "VERSION") { + readVersion(); + } else if (SymbolAssignment *Cmd = readProvideOrAssignment(Tok)) { + Script->Opt.Commands.push_back(Cmd); + } else { + setError("unknown directive: " + Tok); + } + } +} + +void ScriptParser::addFile(StringRef S) { + if (IsUnderSysroot && S.startswith("/")) { + SmallString<128> PathData; + StringRef Path = (Config->Sysroot + S).toStringRef(PathData); + if (sys::fs::exists(Path)) { + Driver->addFile(Saver.save(Path), /*WithLOption=*/false); + return; + } + } + + if (sys::path::is_absolute(S)) { + Driver->addFile(S, /*WithLOption=*/false); + } else if (S.startswith("=")) { + if (Config->Sysroot.empty()) + Driver->addFile(S.substr(1), /*WithLOption=*/false); + else + Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)), + /*WithLOption=*/false); + } else if (S.startswith("-l")) { + Driver->addLibrary(S.substr(2)); + } else if (sys::fs::exists(S)) { + Driver->addFile(S, /*WithLOption=*/false); + } else { + if (Optional<std::string> Path = findFromSearchPaths(S)) + Driver->addFile(Saver.save(*Path), /*WithLOption=*/true); + else + setError("unable to find " + S); + } +} + +void ScriptParser::readAsNeeded() { + expect("("); + bool Orig = Config->AsNeeded; + Config->AsNeeded = true; + while (!Error && !consume(")")) + addFile(unquote(next())); + Config->AsNeeded = Orig; +} + +void ScriptParser::readEntry() { + // -e <symbol> takes predecence over ENTRY(<symbol>). + expect("("); + StringRef Tok = next(); + if (Config->Entry.empty()) + Config->Entry = Tok; + expect(")"); +} + +void ScriptParser::readExtern() { + expect("("); + while (!Error && !consume(")")) + Config->Undefined.push_back(next()); +} + +void ScriptParser::readGroup() { + expect("("); + while (!Error && !consume(")")) { + if (consume("AS_NEEDED")) + readAsNeeded(); + else + addFile(unquote(next())); + } +} + +void ScriptParser::readInclude() { + StringRef Tok = unquote(next()); + + // https://sourceware.org/binutils/docs/ld/File-Commands.html: + // The file will be searched for in the current directory, and in any + // directory specified with the -L option. + if (sys::fs::exists(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(Tok)) + tokenize(*MB); + return; + } + if (Optional<std::string> Path = findFromSearchPaths(Tok)) { + if (Optional<MemoryBufferRef> MB = readFile(*Path)) + tokenize(*MB); return; } + setError("cannot open " + Tok); +} + +void ScriptParser::readOutput() { + // -o <file> takes predecence over OUTPUT(<file>). + expect("("); + StringRef Tok = next(); + if (Config->OutputFile.empty()) + Config->OutputFile = unquote(Tok); + expect(")"); +} - std::string S = getCurrentLocation() + ": "; - error(S + Msg); - error(S + getLine()); - error(S + std::string(getColumnNumber(), ' ') + "^"); +void ScriptParser::readOutputArch() { + // OUTPUT_ARCH is ignored for now. + expect("("); + while (!Error && !consume(")")) + skip(); } -// Split S into linker script tokens. -void ScriptParserBase::tokenize(MemoryBufferRef MB) { - std::vector<StringRef> Vec; - MBs.push_back(MB); - StringRef S = MB.getBuffer(); - StringRef Begin = S; +void ScriptParser::readOutputFormat() { + // Error checking only for now. + expect("("); + skip(); + if (consume(")")) + return; + expect(","); + skip(); + expect(","); + skip(); + expect(")"); +} - for (;;) { - S = skipSpace(S); - if (S.empty()) - break; +void ScriptParser::readPhdrs() { + expect("{"); + while (!Error && !consume("}")) { + Script->Opt.PhdrsCommands.push_back( + {next(), PT_NULL, false, false, UINT_MAX, nullptr}); - // Quoted token. Note that double-quote characters are parts of a token - // because, in a glob match context, only unquoted tokens are interpreted - // as glob patterns. Double-quoted tokens are literal patterns in that - // context. - if (S.startswith("\"")) { - size_t E = S.find("\"", 1); - if (E == StringRef::npos) { - StringRef Filename = MB.getBufferIdentifier(); - size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); - error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); - return; - } + PhdrsCommand &PhdrCmd = Script->Opt.PhdrsCommands.back(); + PhdrCmd.Type = readPhdrType(); - Vec.push_back(S.take_front(E + 1)); - S = S.substr(E + 1); - continue; + while (!Error && !consume(";")) { + if (consume("FILEHDR")) + PhdrCmd.HasFilehdr = true; + else if (consume("PHDRS")) + PhdrCmd.HasPhdrs = true; + else if (consume("AT")) + PhdrCmd.LMAExpr = readParenExpr(); + else if (consume("FLAGS")) + PhdrCmd.Flags = readParenExpr()().getValue(); + else + setError("unexpected header attribute: " + next()); } + } +} - // Unquoted token. This is more relaxed than tokens in C-like language, - // so that you can write "file-name.cpp" as one bare token, for example. - size_t Pos = S.find_first_not_of( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - "0123456789_.$/\\~=+[]*?-:!<>^"); +void ScriptParser::readSearchDir() { + expect("("); + StringRef Tok = next(); + if (!Config->Nostdlib) + Config->SearchPaths.push_back(unquote(Tok)); + expect(")"); +} + +void ScriptParser::readSections() { + Script->Opt.HasSections = true; + + // -no-rosegment is used to avoid placing read only non-executable sections in + // their own segment. We do the same if SECTIONS command is present in linker + // script. See comment for computeFlags(). + Config->SingleRoRx = true; - // A character that cannot start a word (which is usually a - // punctuation) forms a single character token. - if (Pos == 0) - Pos = 1; - Vec.push_back(S.substr(0, Pos)); - S = S.substr(Pos); + expect("{"); + while (!Error && !consume("}")) { + StringRef Tok = next(); + BaseCommand *Cmd = readProvideOrAssignment(Tok); + if (!Cmd) { + if (Tok == "ASSERT") + Cmd = readAssert(); + else + Cmd = readOutputSectionDescription(Tok); + } + Script->Opt.Commands.push_back(Cmd); } +} - Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); +static int precedence(StringRef Op) { + return StringSwitch<int>(Op) + .Cases("*", "/", 5) + .Cases("+", "-", 4) + .Cases("<<", ">>", 3) + .Cases("<", "<=", ">", ">=", "==", "!=", 2) + .Cases("&", "|", 1) + .Default(-1); } -// Skip leading whitespace characters or comments. -StringRef ScriptParserBase::skipSpace(StringRef S) { - for (;;) { - if (S.startswith("/*")) { - size_t E = S.find("*/", 2); - if (E == StringRef::npos) { - error("unclosed comment in a linker script"); - return ""; +StringMatcher ScriptParser::readFilePatterns() { + std::vector<StringRef> V; + while (!Error && !consume(")")) + V.push_back(next()); + return StringMatcher(V); +} + +SortSectionPolicy ScriptParser::readSortKind() { + if (consume("SORT") || consume("SORT_BY_NAME")) + return SortSectionPolicy::Name; + if (consume("SORT_BY_ALIGNMENT")) + return SortSectionPolicy::Alignment; + if (consume("SORT_BY_INIT_PRIORITY")) + return SortSectionPolicy::Priority; + if (consume("SORT_NONE")) + return SortSectionPolicy::None; + return SortSectionPolicy::Default; +} + +// Reads SECTIONS command contents in the following form: +// +// <contents> ::= <elem>* +// <elem> ::= <exclude>? <glob-pattern> +// <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" +// +// For example, +// +// *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) +// +// is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". +// The semantics of that is section .foo in any file, section .bar in +// any file but a.o, and section .baz in any file but b.o. +std::vector<SectionPattern> ScriptParser::readInputSectionsList() { + std::vector<SectionPattern> Ret; + while (!Error && peek() != ")") { + StringMatcher ExcludeFilePat; + if (consume("EXCLUDE_FILE")) { + expect("("); + ExcludeFilePat = readFilePatterns(); + } + + std::vector<StringRef> V; + while (!Error && peek() != ")" && peek() != "EXCLUDE_FILE") + V.push_back(next()); + + if (!V.empty()) + Ret.push_back({std::move(ExcludeFilePat), StringMatcher(V)}); + else + setError("section pattern is expected"); + } + return Ret; +} + +// Reads contents of "SECTIONS" directive. That directive contains a +// list of glob patterns for input sections. The grammar is as follows. +// +// <patterns> ::= <section-list> +// | <sort> "(" <section-list> ")" +// | <sort> "(" <sort> "(" <section-list> ")" ")" +// +// <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" +// | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" +// +// <section-list> is parsed by readInputSectionsList(). +InputSectionDescription * +ScriptParser::readInputSectionRules(StringRef FilePattern) { + auto *Cmd = make<InputSectionDescription>(FilePattern); + expect("("); + + while (!Error && !consume(")")) { + SortSectionPolicy Outer = readSortKind(); + SortSectionPolicy Inner = SortSectionPolicy::Default; + std::vector<SectionPattern> V; + if (Outer != SortSectionPolicy::Default) { + expect("("); + Inner = readSortKind(); + if (Inner != SortSectionPolicy::Default) { + expect("("); + V = readInputSectionsList(); + expect(")"); + } else { + V = readInputSectionsList(); } - S = S.substr(E + 2); - continue; + expect(")"); + } else { + V = readInputSectionsList(); } - if (S.startswith("#")) { - size_t E = S.find('\n', 1); - if (E == StringRef::npos) - E = S.size() - 1; - S = S.substr(E + 1); - continue; + + for (SectionPattern &Pat : V) { + Pat.SortInner = Inner; + Pat.SortOuter = Outer; } - size_t Size = S.size(); - S = S.ltrim(); - if (S.size() == Size) - return S; + + std::move(V.begin(), V.end(), std::back_inserter(Cmd->SectionPatterns)); } + return Cmd; } -// An erroneous token is handled as if it were the last token before EOF. -bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } +InputSectionDescription * +ScriptParser::readInputSectionDescription(StringRef Tok) { + // Input section wildcard can be surrounded by KEEP. + // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep + if (Tok == "KEEP") { + expect("("); + StringRef FilePattern = next(); + InputSectionDescription *Cmd = readInputSectionRules(FilePattern); + expect(")"); + Script->Opt.KeptSections.push_back(Cmd); + return Cmd; + } + return readInputSectionRules(Tok); +} + +void ScriptParser::readSort() { + expect("("); + expect("CONSTRUCTORS"); + expect(")"); +} -StringRef ScriptParserBase::next() { - if (Error) - return ""; - if (atEOF()) { - setError("unexpected EOF"); - return ""; +AssertCommand *ScriptParser::readAssert() { + return make<AssertCommand>(readAssertExpr()); +} + +Expr ScriptParser::readAssertExpr() { + expect("("); + Expr E = readExpr(); + expect(","); + StringRef Msg = unquote(next()); + expect(")"); + + return [=] { + if (!E().getValue()) + error(Msg); + return Script->getDot(); + }; +} + +// Reads a FILL(expr) command. We handle the FILL command as an +// alias for =fillexp section attribute, which is different from +// what GNU linkers do. +// https://sourceware.org/binutils/docs/ld/Output-Section-Data.html +uint32_t ScriptParser::readFill() { + expect("("); + uint32_t V = parseFill(next()); + expect(")"); + return V; +} + +OutputSectionCommand * +ScriptParser::readOutputSectionDescription(StringRef OutSec) { + OutputSectionCommand *Cmd = make<OutputSectionCommand>(OutSec); + Cmd->Location = getCurrentLocation(); + + // Read an address expression. + // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html + if (peek() != ":") + Cmd->AddrExpr = readExpr(); + + expect(":"); + + if (consume("AT")) + Cmd->LMAExpr = readParenExpr(); + if (consume("ALIGN")) + Cmd->AlignExpr = readParenExpr(); + if (consume("SUBALIGN")) + Cmd->SubalignExpr = readParenExpr(); + + // Parse constraints. + if (consume("ONLY_IF_RO")) + Cmd->Constraint = ConstraintKind::ReadOnly; + if (consume("ONLY_IF_RW")) + Cmd->Constraint = ConstraintKind::ReadWrite; + expect("{"); + + while (!Error && !consume("}")) { + StringRef Tok = next(); + if (Tok == ";") { + // Empty commands are allowed. Do nothing here. + } else if (SymbolAssignment *Assign = readProvideOrAssignment(Tok)) { + Cmd->Commands.push_back(Assign); + } else if (BytesDataCommand *Data = readBytesDataCommand(Tok)) { + Cmd->Commands.push_back(Data); + } else if (Tok == "ASSERT") { + Cmd->Commands.push_back(readAssert()); + expect(";"); + } else if (Tok == "CONSTRUCTORS") { + // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors + // by name. This is for very old file formats such as ECOFF/XCOFF. + // For ELF, we should ignore. + } else if (Tok == "FILL") { + Cmd->Filler = readFill(); + } else if (Tok == "SORT") { + readSort(); + } else if (peek() == "(") { + Cmd->Commands.push_back(readInputSectionDescription(Tok)); + } else { + setError("unknown command " + Tok); + } } - return Tokens[Pos++]; + + if (consume(">")) + Cmd->MemoryRegionName = next(); + + Cmd->Phdrs = readOutputSectionPhdrs(); + + if (consume("=")) + Cmd->Filler = parseFill(next()); + else if (peek().startswith("=")) + Cmd->Filler = parseFill(next().drop_front()); + + // Consume optional comma following output section command. + consume(","); + + return Cmd; } -StringRef ScriptParserBase::peek() { +// Parses a given string as a octal/decimal/hexadecimal number and +// returns it as a big-endian number. Used for `=<fillexp>`. +// https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html +// +// When reading a hexstring, ld.bfd handles it as a blob of arbitrary +// size, while ld.gold always handles it as a 32-bit big-endian number. +// We are compatible with ld.gold because it's easier to implement. +uint32_t ScriptParser::parseFill(StringRef Tok) { + uint32_t V = 0; + if (Tok.getAsInteger(0, V)) + setError("invalid filler expression: " + Tok); + + uint32_t Buf; + write32be(&Buf, V); + return Buf; +} + +SymbolAssignment *ScriptParser::readProvideHidden(bool Provide, bool Hidden) { + expect("("); + SymbolAssignment *Cmd = readAssignment(next()); + Cmd->Provide = Provide; + Cmd->Hidden = Hidden; + expect(")"); + expect(";"); + return Cmd; +} + +SymbolAssignment *ScriptParser::readProvideOrAssignment(StringRef Tok) { + SymbolAssignment *Cmd = nullptr; + if (peek() == "=" || peek() == "+=") { + Cmd = readAssignment(Tok); + expect(";"); + } else if (Tok == "PROVIDE") { + Cmd = readProvideHidden(true, false); + } else if (Tok == "HIDDEN") { + Cmd = readProvideHidden(false, true); + } else if (Tok == "PROVIDE_HIDDEN") { + Cmd = readProvideHidden(true, true); + } + return Cmd; +} + +SymbolAssignment *ScriptParser::readAssignment(StringRef Name) { + StringRef Op = next(); + assert(Op == "=" || Op == "+="); + Expr E = readExpr(); + if (Op == "+=") { + std::string Loc = getCurrentLocation(); + E = [=] { return add(Script->getSymbolValue(Loc, Name), E()); }; + } + return make<SymbolAssignment>(Name, E, getCurrentLocation()); +} + +// This is an operator-precedence parser to parse a linker +// script expression. +Expr ScriptParser::readExpr() { + // Our lexer is context-aware. Set the in-expression bit so that + // they apply different tokenization rules. + bool Orig = InExpr; + InExpr = true; + Expr E = readExpr1(readPrimary(), 0); + InExpr = Orig; + return E; +} + +static Expr combine(StringRef Op, Expr L, Expr R) { + if (Op == "+") + return [=] { return add(L(), R()); }; + if (Op == "-") + return [=] { return sub(L(), R()); }; + if (Op == "*") + return [=] { return mul(L(), R()); }; + if (Op == "/") + return [=] { return div(L(), R()); }; + if (Op == "<<") + return [=] { return L().getValue() << R().getValue(); }; + if (Op == ">>") + return [=] { return L().getValue() >> R().getValue(); }; + if (Op == "<") + return [=] { return L().getValue() < R().getValue(); }; + if (Op == ">") + return [=] { return L().getValue() > R().getValue(); }; + if (Op == ">=") + return [=] { return L().getValue() >= R().getValue(); }; + if (Op == "<=") + return [=] { return L().getValue() <= R().getValue(); }; + if (Op == "==") + return [=] { return L().getValue() == R().getValue(); }; + if (Op == "!=") + return [=] { return L().getValue() != R().getValue(); }; + if (Op == "&") + return [=] { return bitAnd(L(), R()); }; + if (Op == "|") + return [=] { return bitOr(L(), R()); }; + llvm_unreachable("invalid operator"); +} + +// This is a part of the operator-precedence parser. This function +// assumes that the remaining token stream starts with an operator. +Expr ScriptParser::readExpr1(Expr Lhs, int MinPrec) { + while (!atEOF() && !Error) { + // Read an operator and an expression. + if (consume("?")) + return readTernary(Lhs); + StringRef Op1 = peek(); + if (precedence(Op1) < MinPrec) + break; + skip(); + Expr Rhs = readPrimary(); + + // Evaluate the remaining part of the expression first if the + // next operator has greater precedence than the previous one. + // For example, if we have read "+" and "3", and if the next + // operator is "*", then we'll evaluate 3 * ... part first. + while (!atEOF()) { + StringRef Op2 = peek(); + if (precedence(Op2) <= precedence(Op1)) + break; + Rhs = readExpr1(Rhs, precedence(Op2)); + } + + Lhs = combine(Op1, Lhs, Rhs); + } + return Lhs; +} + +uint64_t static getConstant(StringRef S) { + if (S == "COMMONPAGESIZE") + return Target->PageSize; + if (S == "MAXPAGESIZE") + return Config->MaxPageSize; + error("unknown constant: " + S); + return 0; +} + +// Parses Tok as an integer. It recognizes hexadecimal (prefixed with +// "0x" or suffixed with "H") and decimal numbers. Decimal numbers may +// have "K" (Ki) or "M" (Mi) suffixes. +static Optional<uint64_t> parseInt(StringRef Tok) { + // Negative number + if (Tok.startswith("-")) { + if (Optional<uint64_t> Val = parseInt(Tok.substr(1))) + return -*Val; + return None; + } + + // Hexadecimal + uint64_t Val; + if (Tok.startswith_lower("0x") && !Tok.substr(2).getAsInteger(16, Val)) + return Val; + if (Tok.endswith_lower("H") && !Tok.drop_back().getAsInteger(16, Val)) + return Val; + + // Decimal + if (Tok.endswith_lower("K")) { + if (Tok.drop_back().getAsInteger(10, Val)) + return None; + return Val * 1024; + } + if (Tok.endswith_lower("M")) { + if (Tok.drop_back().getAsInteger(10, Val)) + return None; + return Val * 1024 * 1024; + } + if (Tok.getAsInteger(10, Val)) + return None; + return Val; +} + +BytesDataCommand *ScriptParser::readBytesDataCommand(StringRef Tok) { + int Size = StringSwitch<int>(Tok) + .Case("BYTE", 1) + .Case("SHORT", 2) + .Case("LONG", 4) + .Case("QUAD", 8) + .Default(-1); + if (Size == -1) + return nullptr; + + return make<BytesDataCommand>(readParenExpr(), Size); +} + +StringRef ScriptParser::readParenLiteral() { + expect("("); StringRef Tok = next(); - if (Error) - return ""; - --Pos; + expect(")"); return Tok; } -bool ScriptParserBase::consume(StringRef Tok) { - if (peek() == Tok) { +Expr ScriptParser::readPrimary() { + if (peek() == "(") + return readParenExpr(); + + if (consume("~")) { + Expr E = readPrimary(); + return [=] { return ~E().getValue(); }; + } + if (consume("-")) { + Expr E = readPrimary(); + return [=] { return -E().getValue(); }; + } + + StringRef Tok = next(); + std::string Location = getCurrentLocation(); + + // Built-in functions are parsed here. + // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. + if (Tok == "ABSOLUTE") { + Expr Inner = readParenExpr(); + return [=] { + ExprValue I = Inner(); + I.ForceAbsolute = true; + return I; + }; + } + if (Tok == "ADDR") { + StringRef Name = readParenLiteral(); + return [=]() -> ExprValue { + return {Script->getOutputSection(Location, Name), 0}; + }; + } + if (Tok == "ALIGN") { + expect("("); + Expr E = readExpr(); + if (consume(")")) + return [=] { return alignTo(Script->getDot(), E().getValue()); }; + expect(","); + Expr E2 = readExpr(); + expect(")"); + return [=] { return alignTo(E().getValue(), E2().getValue()); }; + } + if (Tok == "ALIGNOF") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSection(Location, Name)->Alignment; }; + } + if (Tok == "ASSERT") + return readAssertExpr(); + if (Tok == "CONSTANT") { + StringRef Name = readParenLiteral(); + return [=] { return getConstant(Name); }; + } + if (Tok == "DATA_SEGMENT_ALIGN") { + expect("("); + Expr E = readExpr(); + expect(","); + readExpr(); + expect(")"); + return [=] { return alignTo(Script->getDot(), E().getValue()); }; + } + if (Tok == "DATA_SEGMENT_END") { + expect("("); + expect("."); + expect(")"); + return [] { return Script->getDot(); }; + } + if (Tok == "DATA_SEGMENT_RELRO_END") { + // GNU linkers implements more complicated logic to handle + // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and + // just align to the next page boundary for simplicity. + expect("("); + readExpr(); + expect(","); + readExpr(); + expect(")"); + return [] { return alignTo(Script->getDot(), Target->PageSize); }; + } + if (Tok == "DEFINED") { + StringRef Name = readParenLiteral(); + return [=] { return Script->isDefined(Name) ? 1 : 0; }; + } + if (Tok == "LOADADDR") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSection(Location, Name)->getLMA(); }; + } + if (Tok == "SEGMENT_START") { + expect("("); skip(); - return true; + expect(","); + Expr E = readExpr(); + expect(")"); + return [=] { return E(); }; } - return false; + if (Tok == "SIZEOF") { + StringRef Name = readParenLiteral(); + return [=] { return Script->getOutputSectionSize(Name); }; + } + if (Tok == "SIZEOF_HEADERS") + return [=] { return elf::getHeaderSize(); }; + + // Tok is the dot. + if (Tok == ".") + return [=] { return Script->getSymbolValue(Location, Tok); }; + + // Tok is a literal number. + if (Optional<uint64_t> Val = parseInt(Tok)) + return [=] { return *Val; }; + + // Tok is a symbol name. + if (!isValidCIdentifier(Tok)) + setError("malformed number: " + Tok); + Script->Opt.ReferencedSymbols.push_back(Tok); + return [=] { return Script->getSymbolValue(Location, Tok); }; } -void ScriptParserBase::skip() { (void)next(); } +Expr ScriptParser::readTernary(Expr Cond) { + Expr L = readExpr(); + expect(":"); + Expr R = readExpr(); + return [=] { return Cond().getValue() ? L() : R(); }; +} -void ScriptParserBase::expect(StringRef Expect) { - if (Error) - return; +Expr ScriptParser::readParenExpr() { + expect("("); + Expr E = readExpr(); + expect(")"); + return E; +} + +std::vector<StringRef> ScriptParser::readOutputSectionPhdrs() { + std::vector<StringRef> Phdrs; + while (!Error && peek().startswith(":")) { + StringRef Tok = next(); + Phdrs.push_back((Tok.size() == 1) ? next() : Tok.substr(1)); + } + return Phdrs; +} + +// Read a program header type name. The next token must be a +// name of a program header type or a constant (e.g. "0x3"). +unsigned ScriptParser::readPhdrType() { StringRef Tok = next(); - if (Tok != Expect) - setError(Expect + " expected, but got " + Tok); + if (Optional<uint64_t> Val = parseInt(Tok)) + return *Val; + + unsigned Ret = StringSwitch<unsigned>(Tok) + .Case("PT_NULL", PT_NULL) + .Case("PT_LOAD", PT_LOAD) + .Case("PT_DYNAMIC", PT_DYNAMIC) + .Case("PT_INTERP", PT_INTERP) + .Case("PT_NOTE", PT_NOTE) + .Case("PT_SHLIB", PT_SHLIB) + .Case("PT_PHDR", PT_PHDR) + .Case("PT_TLS", PT_TLS) + .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) + .Case("PT_GNU_STACK", PT_GNU_STACK) + .Case("PT_GNU_RELRO", PT_GNU_RELRO) + .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) + .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) + .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) + .Default(-1); + + if (Ret == (unsigned)-1) { + setError("invalid program header type: " + Tok); + return PT_NULL; + } + return Ret; } -// Returns true if S encloses T. -static bool encloses(StringRef S, StringRef T) { - return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); +// Reads an anonymous version declaration. +void ScriptParser::readAnonymousDeclaration() { + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::tie(Locals, Globals) = readSymbols(); + + for (SymbolVersion V : Locals) { + if (V.Name == "*") + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + else + Config->VersionScriptLocals.push_back(V); + } + + for (SymbolVersion V : Globals) + Config->VersionScriptGlobals.push_back(V); + + expect(";"); } -MemoryBufferRef ScriptParserBase::getCurrentMB() { - // Find input buffer containing the current token. - assert(!MBs.empty()); - if (!Pos) - return MBs[0]; +// Reads a non-anonymous version definition, +// e.g. "VerStr { global: foo; bar; local: *; };". +void ScriptParser::readVersionDeclaration(StringRef VerStr) { + // Read a symbol list. + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::tie(Locals, Globals) = readSymbols(); + + for (SymbolVersion V : Locals) { + if (V.Name == "*") + Config->DefaultSymbolVersion = VER_NDX_LOCAL; + else + Config->VersionScriptLocals.push_back(V); + } + + // Create a new version definition and add that to the global symbols. + VersionDefinition Ver; + Ver.Name = VerStr; + Ver.Globals = Globals; + + // User-defined version number starts from 2 because 0 and 1 are + // reserved for VER_NDX_LOCAL and VER_NDX_GLOBAL, respectively. + Ver.Id = Config->VersionDefinitions.size() + 2; + Config->VersionDefinitions.push_back(Ver); + + // Each version may have a parent version. For example, "Ver2" + // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" + // as a parent. This version hierarchy is, probably against your + // instinct, purely for hint; the runtime doesn't care about it + // at all. In LLD, we simply ignore it. + if (peek() != ";") + skip(); + expect(";"); +} + +// Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". +std::pair<std::vector<SymbolVersion>, std::vector<SymbolVersion>> +ScriptParser::readSymbols() { + std::vector<SymbolVersion> Locals; + std::vector<SymbolVersion> Globals; + std::vector<SymbolVersion> *V = &Globals; + + while (!Error) { + if (consume("}")) + break; + if (consumeLabel("local")) { + V = &Locals; + continue; + } + if (consumeLabel("global")) { + V = &Globals; + continue; + } + + if (consume("extern")) { + std::vector<SymbolVersion> Ext = readVersionExtern(); + V->insert(V->end(), Ext.begin(), Ext.end()); + } else { + StringRef Tok = next(); + V->push_back({unquote(Tok), false, hasWildcard(Tok)}); + } + expect(";"); + } + return {Locals, Globals}; +} + +// Reads an "extern C++" directive, e.g., +// "extern "C++" { ns::*; "f(int, double)"; };" +std::vector<SymbolVersion> ScriptParser::readVersionExtern() { + StringRef Tok = next(); + bool IsCXX = Tok == "\"C++\""; + if (!IsCXX && Tok != "\"C\"") + setError("Unknown language"); + expect("{"); + + std::vector<SymbolVersion> Ret; + while (!Error && peek() != "}") { + StringRef Tok = next(); + bool HasWildcard = !Tok.startswith("\"") && hasWildcard(Tok); + Ret.push_back({unquote(Tok), IsCXX, HasWildcard}); + expect(";"); + } + + expect("}"); + return Ret; +} + +uint64_t ScriptParser::readMemoryAssignment(StringRef S1, StringRef S2, + StringRef S3) { + if (!consume(S1) && !consume(S2) && !consume(S3)) { + setError("expected one of: " + S1 + ", " + S2 + ", or " + S3); + return 0; + } + expect("="); + return readExpr()().getValue(); +} + +// Parse the MEMORY command as specified in: +// https://sourceware.org/binutils/docs/ld/MEMORY.html +// +// MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } +void ScriptParser::readMemory() { + expect("{"); + while (!Error && !consume("}")) { + StringRef Name = next(); + + uint32_t Flags = 0; + uint32_t NegFlags = 0; + if (consume("(")) { + std::tie(Flags, NegFlags) = readMemoryAttributes(); + expect(")"); + } + expect(":"); + + uint64_t Origin = readMemoryAssignment("ORIGIN", "org", "o"); + expect(","); + uint64_t Length = readMemoryAssignment("LENGTH", "len", "l"); + + // Add the memory region to the region map (if it doesn't already exist). + auto It = Script->Opt.MemoryRegions.find(Name); + if (It != Script->Opt.MemoryRegions.end()) + setError("region '" + Name + "' already defined"); + else + Script->Opt.MemoryRegions[Name] = {Name, Origin, Length, + Origin, Flags, NegFlags}; + } +} + +// This function parses the attributes used to match against section +// flags when placing output sections in a memory region. These flags +// are only used when an explicit memory region name is not used. +std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() { + uint32_t Flags = 0; + uint32_t NegFlags = 0; + bool Invert = false; + + for (char C : next().lower()) { + uint32_t Flag = 0; + if (C == '!') + Invert = !Invert; + else if (C == 'w') + Flag = SHF_WRITE; + else if (C == 'x') + Flag = SHF_EXECINSTR; + else if (C == 'a') + Flag = SHF_ALLOC; + else if (C != 'r') + setError("invalid memory region attribute"); + + if (Invert) + NegFlags |= Flag; + else + Flags |= Flag; + } + return {Flags, NegFlags}; +} + +void elf::readLinkerScript(MemoryBufferRef MB) { + ScriptParser(MB).readLinkerScript(); +} + +void elf::readVersionScript(MemoryBufferRef MB) { + ScriptParser(MB).readVersionScript(); +} - for (MemoryBufferRef MB : MBs) - if (encloses(MB.getBuffer(), Tokens[Pos - 1])) - return MB; - llvm_unreachable("getCurrentMB: failed to find a token"); +void elf::readDynamicList(MemoryBufferRef MB) { + ScriptParser(MB).readDynamicList(); } |