diff options
Diffstat (limited to 'include/lld/ReaderWriter')
-rw-r--r-- | include/lld/ReaderWriter/AtomLayout.h | 39 | ||||
-rw-r--r-- | include/lld/ReaderWriter/CoreLinkingContext.h | 47 | ||||
-rw-r--r-- | include/lld/ReaderWriter/ELFLinkingContext.h | 362 | ||||
-rw-r--r-- | include/lld/ReaderWriter/ELFTargets.h | 38 | ||||
-rw-r--r-- | include/lld/ReaderWriter/LinkerScript.h | 1396 | ||||
-rw-r--r-- | include/lld/ReaderWriter/MachOLinkingContext.h | 369 | ||||
-rw-r--r-- | include/lld/ReaderWriter/PECOFFLinkingContext.h | 463 | ||||
-rw-r--r-- | include/lld/ReaderWriter/RelocationHelperFunctions.h | 57 | ||||
-rw-r--r-- | include/lld/ReaderWriter/YamlContext.h | 46 |
9 files changed, 2817 insertions, 0 deletions
diff --git a/include/lld/ReaderWriter/AtomLayout.h b/include/lld/ReaderWriter/AtomLayout.h new file mode 100644 index 0000000000000..ad4cd0607b88a --- /dev/null +++ b/include/lld/ReaderWriter/AtomLayout.h @@ -0,0 +1,39 @@ +//===- include/lld/ReaderWriter/AtomLayout.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ATOM_LAYOUT_H +#define LLD_READER_WRITER_ATOM_LAYOUT_H + +namespace lld { +class Atom; + +/// AtomLayouts are used by a writer to manage physical positions of atoms. +/// AtomLayout has two positions; one is file offset, and the other is the +/// address when loaded into memory. +/// +/// Construction of AtomLayouts is usually a multi-pass process. When an atom +/// is appended to a section, we don't know the starting address of the +/// section. Thus, we have no choice but to store the offset from the +/// beginning of the section as AtomLayout values. After all sections starting +/// address are fixed, AtomLayout is revisited to get the offsets updated by +/// adding the starting addresses of the section. +struct AtomLayout { + AtomLayout(const Atom *a, uint64_t fileOff, uint64_t virAddr) + : _atom(a), _fileOffset(fileOff), _virtualAddr(virAddr) {} + + AtomLayout() : _atom(nullptr), _fileOffset(0), _virtualAddr(0) {} + + const Atom *_atom; + uint64_t _fileOffset; + uint64_t _virtualAddr; +}; + +} + +#endif diff --git a/include/lld/ReaderWriter/CoreLinkingContext.h b/include/lld/ReaderWriter/CoreLinkingContext.h new file mode 100644 index 0000000000000..d597ca46ddc7c --- /dev/null +++ b/include/lld/ReaderWriter/CoreLinkingContext.h @@ -0,0 +1,47 @@ +//===- lld/ReaderWriter/CoreLinkingContext.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_CORE_LINKER_CONTEXT_H +#define LLD_READER_WRITER_CORE_LINKER_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/Support/ErrorHandling.h" + +namespace lld { + +class CoreLinkingContext : public LinkingContext { +public: + CoreLinkingContext(); + + enum { + TEST_RELOC_CALL32 = 1, + TEST_RELOC_PCREL32 = 2, + TEST_RELOC_GOT_LOAD32 = 3, + TEST_RELOC_GOT_USE32 = 4, + TEST_RELOC_LEA32_WAS_GOT = 5, + }; + + bool validateImpl(raw_ostream &diagnostics) override; + void addPasses(PassManager &pm) override; + + void addPassNamed(StringRef name) { _passNames.push_back(name); } + +protected: + Writer &writer() const override; + +private: + std::unique_ptr<Writer> _writer; + std::vector<StringRef> _passNames; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/ELFLinkingContext.h b/include/lld/ReaderWriter/ELFLinkingContext.h new file mode 100644 index 0000000000000..d1cd3d9f3d6b5 --- /dev/null +++ b/include/lld/ReaderWriter/ELFLinkingContext.h @@ -0,0 +1,362 @@ +//===- lld/ReaderWriter/ELFLinkingContext.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_LINKER_CONTEXT_H +#define LLD_READER_WRITER_ELF_LINKER_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Pass.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/STDExtras.h" +#include "lld/Core/range.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/LinkerScript.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" +#include <map> +#include <memory> +#include <set> + +namespace lld { +class DefinedAtom; +class Reference; +class File; + +namespace elf { +template <typename ELFT> class TargetHandler; +} + +class TargetHandlerBase { +public: + virtual ~TargetHandlerBase() {} + virtual void registerRelocationNames(Registry &) = 0; + + virtual std::unique_ptr<Reader> getObjReader() = 0; + + virtual std::unique_ptr<Reader> getDSOReader() = 0; + + virtual std::unique_ptr<Writer> getWriter() = 0; +}; + +class ELFLinkingContext : public LinkingContext { +public: + /// \brief The type of ELF executable that the linker + /// creates. + enum class OutputMagic : uint8_t { + DEFAULT, // The default mode, no specific magic set + NMAGIC, // Disallow shared libraries and don't align sections + // PageAlign Data, Mark Text Segment/Data segment RW + OMAGIC // Disallow shared libraries and don't align sections, + // Mark Text Segment/Data segment RW + }; + + llvm::Triple getTriple() const { return _triple; } + + // Page size. + virtual uint64_t getPageSize() const { + if (_maxPageSize) + return *_maxPageSize; + return 0x1000; + } + virtual void setMaxPageSize(uint64_t pagesize) { + _maxPageSize = pagesize; + } + OutputMagic getOutputMagic() const { return _outputMagic; } + uint16_t getOutputELFType() const { return _outputELFType; } + uint16_t getOutputMachine() const; + bool mergeCommonStrings() const { return _mergeCommonStrings; } + virtual uint64_t getBaseAddress() const { return _baseAddress; } + virtual void setBaseAddress(uint64_t address) { _baseAddress = address; } + + void notifySymbolTableCoalesce(const Atom *existingAtom, const Atom *newAtom, + bool &useNew) override; + + /// This controls if undefined atoms need to be created for undefines that are + /// present in a SharedLibrary. If this option is set, undefined atoms are + /// created for every undefined symbol that are present in the dynamic table + /// in the shared library + bool useShlibUndefines() const { return _useShlibUndefines; } + /// @} + + /// \brief Does this relocation belong in the dynamic relocation table? + /// + /// This table is evaluated at loadtime by the dynamic loader and is + /// referenced by the DT_RELA{,ENT,SZ} entries in the dynamic table. + /// Relocations that return true will be added to the dynamic relocation + /// table. + virtual bool isDynamicRelocation(const Reference &) const { return false; } + + /// \brief Is this a copy relocation? + /// + /// If this is a copy relocation, its target must be an ObjectAtom. We must + /// include in DT_NEEDED the name of the library where this object came from. + virtual bool isCopyRelocation(const Reference &) const { + return false; + } + + bool validateImpl(raw_ostream &diagnostics) override; + + /// \brief Does the linker allow dynamic libraries to be linked with? + /// This is true when the output mode of the executable is set to be + /// having NMAGIC/OMAGIC + virtual bool allowLinkWithDynamicLibraries() const { + if (_outputMagic == OutputMagic::NMAGIC || + _outputMagic == OutputMagic::OMAGIC || _noAllowDynamicLibraries) + return false; + return true; + } + + /// \brief Use Elf_Rela format to output relocation tables. + virtual bool isRelaOutputFormat() const { return true; } + + /// \brief Does this relocation belong in the dynamic plt relocation table? + /// + /// This table holds all of the relocations used for delayed symbol binding. + /// It will be evaluated at load time if LD_BIND_NOW is set. It is referenced + /// by the DT_{JMPREL,PLTRELSZ} entries in the dynamic table. + /// Relocations that return true will be added to the dynamic plt relocation + /// table. + virtual bool isPLTRelocation(const Reference &) const { return false; } + + /// \brief The path to the dynamic interpreter + virtual StringRef getDefaultInterpreter() const { + return "/lib64/ld-linux-x86-64.so.2"; + } + + /// \brief The dynamic linker path set by the --dynamic-linker option + virtual StringRef getInterpreter() const { + if (_dynamicLinkerArg) + return _dynamicLinkerPath; + return getDefaultInterpreter(); + } + + /// \brief Does the output have dynamic sections. + virtual bool isDynamic() const; + + /// \brief Are we creating a shared library? + virtual bool isDynamicLibrary() const { + return _outputELFType == llvm::ELF::ET_DYN; + } + + /// \brief Is the relocation a relative relocation + virtual bool isRelativeReloc(const Reference &r) const; + + template <typename ELFT> + lld::elf::TargetHandler<ELFT> &getTargetHandler() const { + assert(_targetHandler && "Got null TargetHandler!"); + return static_cast<lld::elf::TargetHandler<ELFT> &>(*_targetHandler.get()); + } + + TargetHandlerBase *targetHandler() const { return _targetHandler.get(); } + void addPasses(PassManager &pm) override; + + void setTriple(llvm::Triple trip) { _triple = trip; } + void setNoInhibitExec(bool v) { _noInhibitExec = v; } + void setExportDynamic(bool v) { _exportDynamic = v; } + void setIsStaticExecutable(bool v) { _isStaticExecutable = v; } + void setMergeCommonStrings(bool v) { _mergeCommonStrings = v; } + void setUseShlibUndefines(bool use) { _useShlibUndefines = use; } + void setOutputELFType(uint32_t type) { _outputELFType = type; } + + bool shouldExportDynamic() const { return _exportDynamic; } + + void createInternalFiles(std::vector<std::unique_ptr<File>> &) const override; + + void finalizeInputFiles() override; + + /// \brief Set the dynamic linker path + void setInterpreter(StringRef dynamicLinker) { + _dynamicLinkerArg = true; + _dynamicLinkerPath = dynamicLinker; + } + + /// \brief Set NMAGIC output kind when the linker specifies --nmagic + /// or -n in the command line + /// Set OMAGIC output kind when the linker specifies --omagic + /// or -N in the command line + virtual void setOutputMagic(OutputMagic magic) { _outputMagic = magic; } + + /// \brief Disallow dynamic libraries during linking + virtual void setNoAllowDynamicLibraries() { _noAllowDynamicLibraries = true; } + + /// Searches directories for a match on the input File + ErrorOr<StringRef> searchLibrary(StringRef libName) const; + + /// \brief Searches directories for a match on the input file. + /// If \p fileName is an absolute path and \p isSysRooted is true, check + /// the file under sysroot directory. If \p fileName is a relative path + /// and is not in the current directory, search the file through library + /// search directories. + ErrorOr<StringRef> searchFile(StringRef fileName, bool isSysRooted) const; + + /// Get the entry symbol name + StringRef entrySymbolName() const override; + + /// \brief Set new initializer function + void setInitFunction(StringRef name) { _initFunction = name; } + + /// \brief Return an initializer function name. + /// Either default "_init" or configured by the -init command line option. + StringRef initFunction() const { return _initFunction; } + + /// \brief Set new finalizer function + void setFiniFunction(StringRef name) { _finiFunction = name; } + + /// \brief Return a finalizer function name. + /// Either default "_fini" or configured by the -fini command line option. + StringRef finiFunction() const { return _finiFunction; } + + /// Add an absolute symbol. Used for --defsym. + void addInitialAbsoluteSymbol(StringRef name, uint64_t addr) { + _absoluteSymbols[name] = addr; + } + + void setSharedObjectName(StringRef soname) { + _soname = soname; + } + + StringRef sharedObjectName() const { return _soname; } + + StringRef getSysroot() const { return _sysrootPath; } + + /// \brief Set path to the system root + void setSysroot(StringRef path) { + _sysrootPath = path; + } + + void addRpath(StringRef path) { + _rpathList.push_back(path); + } + + range<const StringRef *> getRpathList() const { + return _rpathList; + } + + void addRpathLink(StringRef path) { + _rpathLinkList.push_back(path); + } + + range<const StringRef *> getRpathLinkList() const { + return _rpathLinkList; + } + + const std::map<std::string, uint64_t> &getAbsoluteSymbols() const { + return _absoluteSymbols; + } + + /// \brief Helper function to allocate strings. + StringRef allocateString(StringRef ref) const { + char *x = _allocator.Allocate<char>(ref.size() + 1); + memcpy(x, ref.data(), ref.size()); + x[ref.size()] = '\0'; + return x; + } + + // add search path to list. + virtual bool addSearchPath(StringRef ref) { + _inputSearchPaths.push_back(ref); + return true; + } + + // Retrieve search path list. + StringRefVector getSearchPaths() { return _inputSearchPaths; }; + + // By default, the linker would merge sections that are read only with + // segments that have read and execute permissions. When the user specifies a + // flag --rosegment, a separate segment needs to be created. + bool mergeRODataToTextSegment() const { return _mergeRODataToTextSegment; } + + void setCreateSeparateROSegment() { _mergeRODataToTextSegment = false; } + + bool isDynamicallyExportedSymbol(StringRef name) const { + return _dynamicallyExportedSymbols.count(name) != 0; + } + + /// \brief Demangle symbols. + std::string demangle(StringRef symbolName) const override; + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + + /// \brief Align segments. + bool alignSegments() const { return _alignSegments; } + void setAlignSegments(bool align) { _alignSegments = align; } + + /// \brief Strip symbols. + bool stripSymbols() const { return _stripSymbols; } + void setStripSymbols(bool strip) { _stripSymbols = strip; } + + /// \brief Collect statistics. + bool collectStats() const { return _collectStats; } + void setCollectStats(bool s) { _collectStats = s; } + + // --wrap option. + void addWrapForSymbol(StringRef sym) { _wrapCalls.insert(sym); } + + const llvm::StringSet<> &wrapCalls() const { return _wrapCalls; } + + void setUndefinesResolver(std::unique_ptr<File> resolver); + + script::Sema &linkerScriptSema() { return _linkerScriptSema; } + const script::Sema &linkerScriptSema() const { return _linkerScriptSema; } + +private: + ELFLinkingContext() = delete; + +protected: + ELFLinkingContext(llvm::Triple, std::unique_ptr<TargetHandlerBase>); + + Writer &writer() const override; + + /// Method to create a internal file for an undefined symbol + std::unique_ptr<File> createUndefinedSymbolFile() const override; + + uint16_t _outputELFType; // e.g ET_EXEC + llvm::Triple _triple; + std::unique_ptr<TargetHandlerBase> _targetHandler; + uint64_t _baseAddress; + bool _isStaticExecutable; + bool _noInhibitExec; + bool _exportDynamic; + bool _mergeCommonStrings; + bool _useShlibUndefines; + bool _dynamicLinkerArg; + bool _noAllowDynamicLibraries; + bool _mergeRODataToTextSegment; + bool _demangle; + bool _stripSymbols; + bool _alignSegments; + bool _nostdlib; + bool _collectStats; + llvm::Optional<uint64_t> _maxPageSize; + + OutputMagic _outputMagic; + StringRefVector _inputSearchPaths; + std::unique_ptr<Writer> _writer; + StringRef _dynamicLinkerPath; + StringRef _initFunction; + StringRef _finiFunction; + StringRef _sysrootPath; + StringRef _soname; + StringRefVector _rpathList; + StringRefVector _rpathLinkList; + llvm::StringSet<> _wrapCalls; + std::map<std::string, uint64_t> _absoluteSymbols; + llvm::StringSet<> _dynamicallyExportedSymbols; + std::unique_ptr<File> _resolver; + + // The linker script semantic object, which owns all script ASTs, is stored + // in the current linking context via _linkerScriptSema. + script::Sema _linkerScriptSema; +}; +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/ELFTargets.h b/include/lld/ReaderWriter/ELFTargets.h new file mode 100644 index 0000000000000..3d00339818e26 --- /dev/null +++ b/include/lld/ReaderWriter/ELFTargets.h @@ -0,0 +1,38 @@ +//===- lld/ReaderWriter/ELFTargets.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_TARGETS_H +#define LLD_READER_WRITER_ELF_TARGETS_H + +#include "ELFLinkingContext.h" + +namespace lld { +namespace elf { + +#define LLVM_TARGET(TargetName) \ + class TargetName##LinkingContext final : public ELFLinkingContext { \ + public: \ + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); \ + }; + +// FIXME: #include "llvm/Config/Targets.def" +LLVM_TARGET(AArch64) +LLVM_TARGET(ARM) +LLVM_TARGET(Hexagon) +LLVM_TARGET(Mips) +LLVM_TARGET(X86) +LLVM_TARGET(Example) +LLVM_TARGET(X86_64) + +#undef LLVM_TARGET + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/LinkerScript.h b/include/lld/ReaderWriter/LinkerScript.h new file mode 100644 index 0000000000000..ae8d18d830c62 --- /dev/null +++ b/include/lld/ReaderWriter/LinkerScript.h @@ -0,0 +1,1396 @@ +//===- ReaderWriter/LinkerScript.h ----------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Linker script parser. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_LINKER_SCRIPT_H +#define LLD_READER_WRITER_LINKER_SCRIPT_H + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/range.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <system_error> +#include <unordered_map> +#include <vector> + +namespace lld { +namespace script { +class Token { +public: + enum Kind { + unknown, + eof, + exclaim, + exclaimequal, + amp, + ampequal, + l_paren, + r_paren, + star, + starequal, + plus, + plusequal, + comma, + minus, + minusequal, + slash, + slashequal, + number, + colon, + semicolon, + less, + lessequal, + lessless, + lesslessequal, + equal, + equalequal, + greater, + greaterequal, + greatergreater, + greatergreaterequal, + question, + identifier, + libname, + kw_align, + kw_align_with_input, + kw_as_needed, + kw_at, + kw_discard, + kw_entry, + kw_exclude_file, + kw_extern, + kw_group, + kw_hidden, + kw_input, + kw_keep, + kw_length, + kw_memory, + kw_origin, + kw_provide, + kw_provide_hidden, + kw_only_if_ro, + kw_only_if_rw, + kw_output, + kw_output_arch, + kw_output_format, + kw_overlay, + kw_search_dir, + kw_sections, + kw_sort_by_alignment, + kw_sort_by_init_priority, + kw_sort_by_name, + kw_sort_none, + kw_subalign, + l_brace, + pipe, + pipeequal, + r_brace, + tilde + }; + + Token() : _kind(unknown) {} + Token(StringRef range, Kind kind) : _range(range), _kind(kind) {} + + void dump(raw_ostream &os) const; + + StringRef _range; + Kind _kind; +}; + +class Lexer { +public: + explicit Lexer(std::unique_ptr<MemoryBuffer> mb) : _buffer(mb->getBuffer()) { + _sourceManager.AddNewSourceBuffer(std::move(mb), llvm::SMLoc()); + } + + void lex(Token &tok); + + const llvm::SourceMgr &getSourceMgr() const { return _sourceManager; } + +private: + bool canStartNumber(char c) const; + bool canContinueNumber(char c) const; + bool canStartName(char c) const; + bool canContinueName(char c) const; + void skipWhitespace(); + + Token _current; + /// \brief The current buffer state. + StringRef _buffer; + // Lexer owns the input files. + llvm::SourceMgr _sourceManager; +}; + +/// All linker scripts commands derive from this class. High-level, sections and +/// output section commands are all subclasses of this class. +/// Examples: +/// +/// OUTPUT_FORMAT("elf64-x86-64") /* A linker script command */ +/// OUTPUT_ARCH(i386:x86-64) /* Another command */ +/// ENTRY(_start) /* Another command */ +/// +/// SECTIONS /* Another command */ +/// { +/// .interp : { /* A sections-command */ +/// *(.interp) /* An output-section-command */ +/// } +/// } +/// +class Command { +public: + enum class Kind { + Entry, + Extern, + Group, + Input, + InputSectionsCmd, + InputSectionName, + Memory, + Output, + OutputArch, + OutputFormat, + OutputSectionDescription, + Overlay, + SearchDir, + Sections, + SortedGroup, + SymbolAssignment, + }; + + Kind getKind() const { return _kind; } + inline llvm::BumpPtrAllocator &getAllocator() const; + + virtual void dump(raw_ostream &os) const = 0; + + virtual ~Command() {} + +protected: + Command(class Parser &ctx, Kind k) : _ctx(ctx), _kind(k) {} + +private: + Parser &_ctx; + Kind _kind; +}; + +class Output : public Command { +public: + Output(Parser &ctx, StringRef outputFileName) + : Command(ctx, Kind::Output), _outputFileName(outputFileName) {} + + static bool classof(const Command *c) { return c->getKind() == Kind::Output; } + + void dump(raw_ostream &os) const override { + os << "OUTPUT(" << _outputFileName << ")\n"; + } + + StringRef getOutputFileName() const { return _outputFileName; } + +private: + StringRef _outputFileName; +}; + +class OutputFormat : public Command { +public: + OutputFormat(Parser &ctx, const SmallVectorImpl<StringRef> &formats) + : Command(ctx, Kind::OutputFormat) { + size_t numFormats = formats.size(); + StringRef *formatsStart = getAllocator().Allocate<StringRef>(numFormats); + std::copy(std::begin(formats), std::end(formats), formatsStart); + _formats = llvm::makeArrayRef(formatsStart, numFormats); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputFormat; + } + + void dump(raw_ostream &os) const override { + os << "OUTPUT_FORMAT("; + bool first = true; + for (StringRef format : _formats) { + if (!first) + os << ","; + first = false; + os << "\"" << format << "\""; + } + os << ")\n"; + } + + llvm::ArrayRef<StringRef> getFormats() { return _formats; } + +private: + llvm::ArrayRef<StringRef> _formats; +}; + +class OutputArch : public Command { +public: + OutputArch(Parser &ctx, StringRef arch) + : Command(ctx, Kind::OutputArch), _arch(arch) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputArch; + } + + void dump(raw_ostream &os) const override { + os << "OUTPUT_ARCH(" << getArch() << ")\n"; + } + + StringRef getArch() const { return _arch; } + +private: + StringRef _arch; +}; + +struct Path { + StringRef _path; + bool _asNeeded; + bool _isDashlPrefix; + + Path() : _asNeeded(false), _isDashlPrefix(false) {} + Path(StringRef path, bool asNeeded = false, bool isLib = false) + : _path(path), _asNeeded(asNeeded), _isDashlPrefix(isLib) {} +}; + +template<Command::Kind K> +class PathList : public Command { +public: + PathList(Parser &ctx, StringRef name, const SmallVectorImpl<Path> &paths) + : Command(ctx, K), _name(name) { + size_t numPaths = paths.size(); + Path *pathsStart = getAllocator().template Allocate<Path>(numPaths); + std::copy(std::begin(paths), std::end(paths), pathsStart); + _paths = llvm::makeArrayRef(pathsStart, numPaths); + } + + static bool classof(const Command *c) { return c->getKind() == K; } + + void dump(raw_ostream &os) const override { + os << _name << "("; + bool first = true; + for (const Path &path : getPaths()) { + if (!first) + os << " "; + first = false; + if (path._asNeeded) + os << "AS_NEEDED("; + if (path._isDashlPrefix) + os << "-l"; + os << path._path; + if (path._asNeeded) + os << ")"; + } + os << ")\n"; + } + + llvm::ArrayRef<Path> getPaths() const { return _paths; } + +private: + StringRef _name; + llvm::ArrayRef<Path> _paths; +}; + +class Group : public PathList<Command::Kind::Group> { +public: + template <class RangeT> + Group(Parser &ctx, RangeT range) + : PathList(ctx, "GROUP", std::move(range)) {} +}; + +class Input : public PathList<Command::Kind::Input> { +public: + template <class RangeT> + Input(Parser &ctx, RangeT range) + : PathList(ctx, "INPUT", std::move(range)) {} +}; + +class Entry : public Command { +public: + Entry(Parser &ctx, StringRef entryName) + : Command(ctx, Kind::Entry), _entryName(entryName) {} + + static bool classof(const Command *c) { return c->getKind() == Kind::Entry; } + + void dump(raw_ostream &os) const override { + os << "ENTRY(" << _entryName << ")\n"; + } + + StringRef getEntryName() const { return _entryName; } + +private: + StringRef _entryName; +}; + +class SearchDir : public Command { +public: + SearchDir(Parser &ctx, StringRef searchPath) + : Command(ctx, Kind::SearchDir), _searchPath(searchPath) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::SearchDir; + } + + void dump(raw_ostream &os) const override { + os << "SEARCH_DIR(\"" << _searchPath << "\")\n"; + } + + StringRef getSearchPath() const { return _searchPath; } + +private: + StringRef _searchPath; +}; + +/// Superclass for expression nodes. Linker scripts accept C-like expressions in +/// many places, such as when defining the value of a symbol or the address of +/// an output section. +/// Example: +/// +/// SECTIONS { +/// my_symbol = 1 + 1 * 2; +/// | | ^~~~> Constant : Expression +/// | | ^~~~> Constant : Expression +/// | | ^~~~> BinOp : Expression +/// ^~~~> Constant : Expression +/// ^~~~> BinOp : Expression (the top-level Expression node) +/// } +/// +class Expression { +public: + // The symbol table does not need to own its string keys and the use of StringMap + // here is an overkill. + typedef llvm::StringMap<int64_t, llvm::BumpPtrAllocator> SymbolTableTy; + + enum class Kind { Constant, Symbol, FunctionCall, Unary, BinOp, + TernaryConditional }; + Kind getKind() const { return _kind; } + inline llvm::BumpPtrAllocator &getAllocator() const; + virtual void dump(raw_ostream &os) const = 0; + virtual ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const = 0; + virtual ~Expression() {} + +protected: + Expression(class Parser &ctx, Kind k) : _ctx(ctx), _kind(k) {} + +private: + Parser &_ctx; + Kind _kind; +}; + +/// A constant value is stored as unsigned because it represents absolute +/// values. We represent negative numbers by composing the unary '-' operator +/// with a constant. +class Constant : public Expression { +public: + Constant(Parser &ctx, uint64_t num) + : Expression(ctx, Kind::Constant), _num(num) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Constant; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + uint64_t _num; +}; + +class Symbol : public Expression { +public: + Symbol(Parser &ctx, StringRef name) + : Expression(ctx, Kind::Symbol), _name(name) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Symbol; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + StringRef _name; +}; + +class FunctionCall : public Expression { +public: + FunctionCall(Parser &ctx, StringRef name, + const SmallVectorImpl<const Expression *> &args) + : Expression(ctx, Kind::FunctionCall), _name(name) { + size_t numArgs = args.size(); + const Expression **argsStart = + getAllocator().Allocate<const Expression *>(numArgs); + std::copy(std::begin(args), std::end(args), argsStart); + _args = llvm::makeArrayRef(argsStart, numArgs); + } + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::FunctionCall; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + StringRef _name; + llvm::ArrayRef<const Expression *> _args; +}; + +class Unary : public Expression { +public: + enum Operation { + Minus, + Not + }; + + Unary(Parser &ctx, Operation op, const Expression *child) + : Expression(ctx, Kind::Unary), _op(op), _child(child) {} + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::Unary; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + Operation _op; + const Expression *_child; +}; + +class BinOp : public Expression { +public: + enum Operation { + And, + CompareDifferent, + CompareEqual, + CompareGreater, + CompareGreaterEqual, + CompareLess, + CompareLessEqual, + Div, + Mul, + Or, + Shl, + Shr, + Sub, + Sum + }; + + BinOp(Parser &ctx, const Expression *lhs, Operation op, const Expression *rhs) + : Expression(ctx, Kind::BinOp), _op(op), _lhs(lhs), _rhs(rhs) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::BinOp; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + Operation _op; + const Expression *_lhs; + const Expression *_rhs; +}; + +/// Operands of the ternary operator can be any expression, similar to the other +/// operations, including another ternary operator. To disambiguate the parse +/// tree, note that ternary conditionals have precedence 13 and, different from +/// other operators, associates right-to-left. For example: +/// +/// i = i > 3 ? i < 5 ? 1 : 2 : 0; +/// +/// will have the following parse tree: +/// +/// i = ((i > 3) ? ((i < 5) ? 1 : 2) : 0); +/// +/// The '>' binds tigher because it has precedence 6. When faced with two "?" +/// ternary operators back-to-back, the parser prioritized the rightmost one. +/// +class TernaryConditional : public Expression { +public: + TernaryConditional(Parser &ctx, const Expression *conditional, + const Expression *trueExpr, const Expression *falseExpr) + : Expression(ctx, Kind::TernaryConditional), _conditional(conditional), + _trueExpr(trueExpr), _falseExpr(falseExpr) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Expression *c) { + return c->getKind() == Kind::TernaryConditional; + } + + ErrorOr<int64_t> evalExpr(SymbolTableTy &symbolTable) const override; + +private: + const Expression *_conditional; + const Expression *_trueExpr; + const Expression *_falseExpr; +}; + +/// Symbol assignments of the form "symbolname = <expression>" may occur either +/// as sections-commands or as output-section-commands. +/// Example: +/// +/// SECTIONS { +/// mysymbol = . /* SymbolAssignment as a sections-command */ +/// .data : { +/// othersymbol = . /* SymbolAssignment as an output-section-command */ +/// } +///} +/// +class SymbolAssignment : public Command { +public: + enum AssignmentKind { Simple, Sum, Sub, Mul, Div, Shl, Shr, And, Or }; + enum AssignmentVisibility { Default, Hidden, Provide, ProvideHidden }; + + SymbolAssignment(Parser &ctx, StringRef name, const Expression *expr, + AssignmentKind kind, AssignmentVisibility visibility) + : Command(ctx, Kind::SymbolAssignment), _expression(expr), _symbol(name), + _assignmentKind(Simple), _assignmentVisibility(visibility) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::SymbolAssignment; + } + + void dump(raw_ostream &os) const override; + const Expression *expr() const { return _expression; } + StringRef symbol() const { return _symbol; } + AssignmentKind assignmentKind() const { return _assignmentKind; } + AssignmentVisibility assignmentVisibility() const { + return _assignmentVisibility; + } + +private: + const Expression *_expression; + StringRef _symbol; + AssignmentKind _assignmentKind; + AssignmentVisibility _assignmentVisibility; +}; + +/// Encodes how to sort file names or section names that are expanded from +/// wildcard operators. This typically occurs in constructs such as +/// SECTIONS { .data : SORT_BY_NAME(*)(*) }}, where the order of the expanded +/// names is important to determine which sections go first. +enum class WildcardSortMode { + NA, + ByAlignment, + ByAlignmentAndName, + ByInitPriority, + ByName, + ByNameAndAlignment, + None +}; + +/// Represents either a single input section name or a group of sorted input +/// section names. They specify which sections to map to a given output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) } +/// /* ^~~~^ InputSectionName : InputSection */ +/// .y: { *(SORT(.text*)) } +/// /* ^~~~~~~~~~~^ InputSectionSortedGroup : InputSection */ +/// } +class InputSection : public Command { +public: + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionName || + c->getKind() == Kind::SortedGroup; + } + +protected: + InputSection(Parser &ctx, Kind k) : Command(ctx, k) {} +}; + +class InputSectionName : public InputSection { +public: + InputSectionName(Parser &ctx, StringRef name, bool excludeFile) + : InputSection(ctx, Kind::InputSectionName), _name(name), + _excludeFile(excludeFile) {} + + void dump(raw_ostream &os) const override; + + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionName; + } + bool hasExcludeFile() const { return _excludeFile; } + StringRef name() const { return _name; } + +private: + StringRef _name; + bool _excludeFile; +}; + +class InputSectionSortedGroup : public InputSection { +public: + typedef llvm::ArrayRef<const InputSection *>::const_iterator const_iterator; + + InputSectionSortedGroup(Parser &ctx, WildcardSortMode sort, + const SmallVectorImpl<const InputSection *> §ions) + : InputSection(ctx, Kind::SortedGroup), _sortMode(sort) { + size_t numSections = sections.size(); + const InputSection **sectionsStart = + getAllocator().Allocate<const InputSection *>(numSections); + std::copy(std::begin(sections), std::end(sections), sectionsStart); + _sections = llvm::makeArrayRef(sectionsStart, numSections); + } + + void dump(raw_ostream &os) const override; + WildcardSortMode sortMode() const { return _sortMode; } + + static bool classof(const Command *c) { + return c->getKind() == Kind::SortedGroup; + } + + const_iterator begin() const { return _sections.begin(); } + const_iterator end() const { return _sections.end(); } + +private: + WildcardSortMode _sortMode; + llvm::ArrayRef<const InputSection *> _sections; +}; + +/// An output-section-command that maps a series of sections inside a given +/// file-archive pair to an output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) } +/// /* ^~~~~~~^ InputSectionsCmd */ +/// .y: { w:z(SORT(.text*)) } +/// /* ^~~~~~~~~~~~~~~~^ InputSectionsCmd */ +/// } +class InputSectionsCmd : public Command { +public: + typedef llvm::ArrayRef<const InputSection *>::const_iterator const_iterator; + typedef std::vector<const InputSection *> VectorTy; + + InputSectionsCmd(Parser &ctx, StringRef memberName, StringRef archiveName, + bool keep, WildcardSortMode fileSortMode, + WildcardSortMode archiveSortMode, + const SmallVectorImpl<const InputSection *> §ions) + : Command(ctx, Kind::InputSectionsCmd), _memberName(memberName), + _archiveName(archiveName), _keep(keep), _fileSortMode(fileSortMode), + _archiveSortMode(archiveSortMode) { + size_t numSections = sections.size(); + const InputSection **sectionsStart = + getAllocator().Allocate<const InputSection *>(numSections); + std::copy(std::begin(sections), std::end(sections), sectionsStart); + _sections = llvm::makeArrayRef(sectionsStart, numSections); + } + + void dump(raw_ostream &os) const override; + + static bool classof(const Command *c) { + return c->getKind() == Kind::InputSectionsCmd; + } + + StringRef memberName() const { return _memberName; } + StringRef archiveName() const { return _archiveName; } + const_iterator begin() const { return _sections.begin(); } + const_iterator end() const { return _sections.end(); } + WildcardSortMode archiveSortMode() const { return _archiveSortMode; } + WildcardSortMode fileSortMode() const { return _fileSortMode; } + +private: + StringRef _memberName; + StringRef _archiveName; + bool _keep; + WildcardSortMode _fileSortMode; + WildcardSortMode _archiveSortMode; + llvm::ArrayRef<const InputSection *> _sections; +}; + +/// A sections-command to specify which input sections and symbols compose a +/// given output section. +/// Example: +/// +/// SECTIONS { +/// .x: { *(.text) ; symbol = .; } +/// /*^~~~~~~~~~~~~~~~~~~~~~~~~~~~~^ OutputSectionDescription */ +/// .y: { w:z(SORT(.text*)) } +/// /*^~~~~~~~~~~~~~~~~~~~~~~~^ OutputSectionDescription */ +/// .a 0x10000 : ONLY_IF_RW { *(.data*) ; *:libc.a(SORT(*)); } +/// /*^~~~~~~~~~~~~ OutputSectionDescription ~~~~~~~~~~~~~~~~~^ */ +/// } +class OutputSectionDescription : public Command { +public: + enum Constraint { C_None, C_OnlyIfRO, C_OnlyIfRW }; + + typedef llvm::ArrayRef<const Command *>::const_iterator const_iterator; + + OutputSectionDescription( + Parser &ctx, StringRef sectionName, const Expression *address, + const Expression *align, const Expression *subAlign, const Expression *at, + const Expression *fillExpr, StringRef fillStream, bool alignWithInput, + bool discard, Constraint constraint, + const SmallVectorImpl<const Command *> &outputSectionCommands) + : Command(ctx, Kind::OutputSectionDescription), _sectionName(sectionName), + _address(address), _align(align), _subAlign(subAlign), _at(at), + _fillExpr(fillExpr), _fillStream(fillStream), + _alignWithInput(alignWithInput), _discard(discard), + _constraint(constraint) { + size_t numCommands = outputSectionCommands.size(); + const Command **commandsStart = + getAllocator().Allocate<const Command *>(numCommands); + std::copy(std::begin(outputSectionCommands), + std::end(outputSectionCommands), commandsStart); + _outputSectionCommands = llvm::makeArrayRef(commandsStart, numCommands); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::OutputSectionDescription; + } + + void dump(raw_ostream &os) const override; + + const_iterator begin() const { return _outputSectionCommands.begin(); } + const_iterator end() const { return _outputSectionCommands.end(); } + StringRef name() const { return _sectionName; } + +private: + StringRef _sectionName; + const Expression *_address; + const Expression *_align; + const Expression *_subAlign; + const Expression *_at; + const Expression *_fillExpr; + StringRef _fillStream; + bool _alignWithInput; + bool _discard; + Constraint _constraint; + llvm::ArrayRef<const Command *> _outputSectionCommands; +}; + +/// Represents an Overlay structure as documented in +/// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description +class Overlay : public Command { +public: + Overlay(Parser &ctx) : Command(ctx, Kind::Overlay) {} + + static bool classof(const Command *c) { + return c->getKind() == Kind::Overlay; + } + + void dump(raw_ostream &os) const override { os << "Overlay description\n"; } +}; + +/// Represents all the contents of the SECTIONS {} construct. +class Sections : public Command { +public: + typedef llvm::ArrayRef<const Command *>::const_iterator const_iterator; + + Sections(Parser &ctx, + const SmallVectorImpl<const Command *> §ionsCommands) + : Command(ctx, Kind::Sections) { + size_t numCommands = sectionsCommands.size(); + const Command **commandsStart = + getAllocator().Allocate<const Command *>(numCommands); + std::copy(std::begin(sectionsCommands), std::end(sectionsCommands), + commandsStart); + _sectionsCommands = llvm::makeArrayRef(commandsStart, numCommands); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Sections; + } + + void dump(raw_ostream &os) const override; + const_iterator begin() const { return _sectionsCommands.begin(); } + const_iterator end() const { return _sectionsCommands.end(); } + +private: + llvm::ArrayRef<const Command *> _sectionsCommands; +}; + +/// Represents a single memory block definition in a MEMORY {} command. +class MemoryBlock { +public: + MemoryBlock(StringRef name, StringRef attr, + const Expression *origin, const Expression *length) + : _name(name), _attr(attr), _origin(origin), _length(length) {} + + void dump(raw_ostream &os) const; + +private: + StringRef _name; + StringRef _attr; + const Expression *_origin; + const Expression *_length; +}; + +/// Represents all the contents of the MEMORY {} command. +class Memory : public Command { +public: + Memory(Parser &ctx, + const SmallVectorImpl<const MemoryBlock *> &blocks) + : Command(ctx, Kind::Memory) { + size_t numBlocks = blocks.size(); + const MemoryBlock **blocksStart = + getAllocator().Allocate<const MemoryBlock *>(numBlocks); + std::copy(std::begin(blocks), std::end(blocks), blocksStart); + _blocks = llvm::makeArrayRef(blocksStart, numBlocks); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Memory; + } + + void dump(raw_ostream &os) const override; + +private: + llvm::ArrayRef<const MemoryBlock *> _blocks; +}; + +/// Represents an extern command. +class Extern : public Command { +public: + typedef llvm::ArrayRef<StringRef>::const_iterator const_iterator; + + Extern(Parser &ctx, + const SmallVectorImpl<StringRef> &symbols) + : Command(ctx, Kind::Extern) { + size_t numSymbols = symbols.size(); + StringRef *symbolsStart = + getAllocator().Allocate<StringRef>(numSymbols); + std::copy(std::begin(symbols), std::end(symbols), symbolsStart); + _symbols = llvm::makeArrayRef(symbolsStart, numSymbols); + } + + static bool classof(const Command *c) { + return c->getKind() == Kind::Extern; + } + + void dump(raw_ostream &os) const override; + const_iterator begin() const { return _symbols.begin(); } + const_iterator end() const { return _symbols.end(); } + +private: + llvm::ArrayRef<StringRef> _symbols; +}; + +/// Stores the parse tree of a linker script. +class LinkerScript { +public: + void dump(raw_ostream &os) const { + for (const Command *c : _commands) { + c->dump(os); + if (isa<SymbolAssignment>(c)) + os << "\n"; + } + } + + std::vector<const Command *> _commands; +}; + +/// Recognizes syntactic constructs of a linker script using a predictive +/// parser/recursive descent implementation. +/// +/// Based on the linker script documentation available at +/// https://sourceware.org/binutils/docs/ld/Scripts.html +class Parser { +public: + explicit Parser(std::unique_ptr<MemoryBuffer> mb) + : _lex(std::move(mb)), _peekAvailable(false) {} + + /// Let's not allow copying of Parser class because it would be expensive + /// to update all the AST pointers to a new buffer. + Parser(const Parser &instance) = delete; + + /// Lex and parse the current memory buffer to create a linker script AST. + std::error_code parse(); + + /// Returns a reference to the top level node of the linker script AST. + LinkerScript *get() { return &_script; } + + /// Returns a reference to the underlying allocator. + llvm::BumpPtrAllocator &getAllocator() { return _alloc; } + +private: + /// Advances to the next token, either asking the Lexer to lex the next token + /// or obtaining it from the look ahead buffer. + void consumeToken() { + // First check if the look ahead buffer cached the next token + if (_peekAvailable) { + _tok = _bufferedToken; + _peekAvailable = false; + return; + } + _lex.lex(_tok); + } + + /// Returns the token that succeeds the current one without consuming the + /// current token. This operation will lex an additional token and store it in + /// a private buffer. + const Token &peek() { + if (_peekAvailable) + return _bufferedToken; + + _lex.lex(_bufferedToken); + _peekAvailable = true; + return _bufferedToken; + } + + void error(const Token &tok, Twine msg) { + _lex.getSourceMgr().PrintMessage( + llvm::SMLoc::getFromPointer(tok._range.data()), + llvm::SourceMgr::DK_Error, msg); + } + + bool expectAndConsume(Token::Kind kind, Twine msg) { + if (_tok._kind != kind) { + error(_tok, msg); + return false; + } + consumeToken(); + return true; + } + + bool isNextToken(Token::Kind kind) { return (_tok._kind == kind); } + + // Recursive descent parsing member functions + // All of these functions consumes tokens and return an AST object, + // represented by the Command superclass. However, note that not all AST + // objects derive from Command. For nodes of C-like expressions, used in + // linker scripts, the superclass is Expression. For nodes that represent + // input sections that map to an output section, the superclass is + // InputSection. + // + // Example mapping common constructs to AST nodes: + // + // SECTIONS { /* Parsed to Sections class */ + // my_symbol = 1 + 1; /* Parsed to SymbolAssignment class */ + // /* ^~~> Parsed to Expression class */ + // .data : { *(.data) } /* Parsed to OutputSectionDescription class */ + // /* ^~~> Parsed to InputSectionName class */ + // /* ^~~~~> Parsed to InputSectionsCmd class */ + // } + + // ==== Expression parsing member functions ==== + + /// Parse "identifier(param [, param]...)" + /// + /// Example: + /// + /// SECTIONS { + /// my_symbol = 0x1000 | ALIGN(other_symbol); + /// /* ^~~~> parseFunctionCall() + /// } + const Expression *parseFunctionCall(); + + /// Ensures that the current token is an expression operand. If it is not, + /// issues an error to the user and returns false. + bool expectExprOperand(); + + /// Parse operands of an expression, such as function calls, identifiers, + /// literal numbers or unary operators. + /// + /// Example: + /// + /// SECTIONS { + /// my_symbol = 0x1000 | ALIGN(other_symbol); + /// ^~~~> parseExprTerminal() + /// } + const Expression *parseExprOperand(); + + // As a reference to the precedence of C operators, consult + // http://en.cppreference.com/w/c/language/operator_precedence + + /// Parse either a single expression operand and returns or parse an entire + /// expression if its top-level node has a lower or equal precedence than the + /// indicated. + const Expression *parseExpression(unsigned precedence = 13); + + /// Parse an operator and its rhs operand, assuming that the lhs was already + /// consumed. Keep parsing subsequent operator-operand pairs that do not + /// exceed highestPrecedence. + /// * lhs points to the left-hand-side operand of this operator + /// * maxPrecedence has the maximum operator precedence level that this parse + /// function is allowed to consume. + const Expression *parseOperatorOperandLoop(const Expression *lhs, + unsigned maxPrecedence); + + /// Parse ternary conditionals such as "(condition)? true: false;". This + /// operator has precedence level 13 and associates right-to-left. + const Expression *parseTernaryCondOp(const Expression *lhs); + + // ==== High-level commands parsing ==== + + /// Parse the OUTPUT linker script command. + /// Example: + /// OUTPUT(/path/to/file) + /// ^~~~> parseOutput() + /// + Output *parseOutput(); + + /// Parse the OUTPUT_FORMAT linker script command. + /// Example: + /// + /// OUTPUT_FORMAT(elf64-x86-64,elf64-x86-64,elf64-x86-64) + /// ^~~~> parseOutputFormat() + /// + OutputFormat *parseOutputFormat(); + + /// Parse the OUTPUT_ARCH linker script command. + /// Example: + /// + /// OUTPUT_ARCH(i386:x86-64) + /// ^~~~> parseOutputArch() + /// + OutputArch *parseOutputArch(); + + /// Parse the INPUT or GROUP linker script command. + /// Example: + /// + /// GROUP ( /lib/x86_64-linux-gnu/libc.so.6 + /// /usr/lib/x86_64-linux-gnu/libc_nonshared.a + /// AS_NEEDED ( /lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 ) + /// -lm -l:libgcc.a ) + /// + template<class T> T *parsePathList(); + bool parseAsNeeded(SmallVectorImpl<Path> &paths); + + /// Parse the ENTRY linker script command. + /// Example: + /// + /// ENTRY(init) + /// ^~~~> parseEntry() + /// + Entry *parseEntry(); + + /// Parse the SEARCH_DIR linker script command. + /// Example: + /// + /// SEARCH_DIR("/usr/x86_64-linux-gnu/lib64"); + /// ^~~~> parseSearchDir() + /// + SearchDir *parseSearchDir(); + + /// Parse "symbol = expression" commands that live inside the + /// SECTIONS directive. + /// Example: + /// + /// SECTIONS { + /// my_symbol = 1 + 1; + /// ^~~~> parseExpression() + /// ^~~~ parseSymbolAssignment() + /// } + /// + const SymbolAssignment *parseSymbolAssignment(); + + /// Parse "EXCLUDE_FILE" used inside the listing of input section names. + /// Example: + /// + /// SECTIONS { + /// .data : { *(EXCLUDE_FILE (*crtend.o *otherfile.o) .ctors) } + /// ^~~~> parseExcludeFile() + /// } + /// + ErrorOr<InputSectionsCmd::VectorTy> parseExcludeFile(); + + /// Helper to parse SORT_BY_NAME(, SORT_BY_ALIGNMENT( and SORT_NONE(, + /// possibly nested. Returns the number of Token::r_paren tokens that need + /// to be consumed, while sortMode is updated with the parsed sort + /// criteria. + /// Example: + /// + /// SORT_BY_NAME(SORT_BY_ALIGNMENT(*)) + /// ^~~~ parseSortDirectives() ~~^ + /// Returns 2, finishes with sortMode = WildcardSortMode::ByNameAndAlignment + /// + int parseSortDirectives(WildcardSortMode &sortMode); + + /// Parse a group of input section names that are sorted via SORT* directives. + /// Example: + /// SORT_BY_NAME(SORT_BY_ALIGNMENT(*data *bss)) + const InputSection *parseSortedInputSections(); + + /// Parse input section description statements. + /// Example: + /// + /// SECTIONS { + /// .mysection : crt.o(.data* .bss SORT_BY_NAME(name*)) + /// ^~~~ parseInputSectionsCmd() + /// } + const InputSectionsCmd *parseInputSectionsCmd(); + + /// Parse output section description statements. + /// Example: + /// + /// SECTIONS { + /// .data : { crt.o(.data* .bss SORT_BY_NAME(name*)) } + /// ^~~~ parseOutputSectionDescription() + /// } + const OutputSectionDescription *parseOutputSectionDescription(); + + /// Stub for parsing overlay commands. Currently unimplemented. + const Overlay *parseOverlay(); + + /// Parse the SECTIONS linker script command. + /// Example: + /// + /// SECTIONS { + /// ^~~~ parseSections() + /// . = 0x100000; + /// .data : { *(.data) } + /// } + /// + Sections *parseSections(); + + /// Parse the MEMORY linker script command. + /// Example: + /// + /// MEMORY { + /// ^~~~ parseMemory() + /// ram (rwx) : ORIGIN = 0x20000000, LENGTH = 96K + /// rom (rx) : ORIGIN = 0x0, LENGTH = 256K + /// } + /// + Memory *parseMemory(); + + /// Parse the EXTERN linker script command. + /// Example: + /// + /// EXTERN(symbol symbol ...) + /// ^~~~> parseExtern() + /// + Extern *parseExtern(); + +private: + // Owns the entire linker script AST nodes + llvm::BumpPtrAllocator _alloc; + + // The top-level/entry-point linker script AST node + LinkerScript _script; + + Lexer _lex; + + // Current token being analyzed + Token _tok; + + // Annotate whether we buffered the next token to allow peeking + bool _peekAvailable; + Token _bufferedToken; +}; + +/// script::Sema traverses all parsed linker script structures and populate +/// internal data structures to be able to answer the following questions: +/// +/// * According to the linker script, which input section goes first in the +/// output file layout, input section A or input section B? +/// +/// * What is the name of the output section that input section A should be +/// mapped to? +/// +/// * Which linker script expressions should be calculated before emitting +/// a given section? +/// +/// * How to evaluate a given linker script expression? +/// +class Sema { +public: + /// From the linker script point of view, this class represents the minimum + /// set of information to uniquely identify an input section. + struct SectionKey { + StringRef archivePath; + StringRef memberPath; + StringRef sectionName; + }; + + Sema(); + + /// We can parse several linker scripts via command line whose ASTs are stored + /// here via addLinkerScript(). + void addLinkerScript(std::unique_ptr<Parser> script) { + _scripts.push_back(std::move(script)); + } + + const std::vector<std::unique_ptr<Parser>> &getLinkerScripts() { + return _scripts; + } + + /// Prepare our data structures according to the linker scripts currently in + /// our control (control given via addLinkerScript()). Called once all linker + /// scripts have been parsed. + void perform(); + + /// Answer if we have layout commands (section mapping rules). If we don't, + /// the output file writer can assume there is no linker script special rule + /// to handle. + bool hasLayoutCommands() const { return _layoutCommands.size() > 0; } + + /// Return true if this section has a mapping rule in the linker script + bool hasMapping(const SectionKey &key) const { + return getLayoutOrder(key, true) >= 0; + } + + /// Order function - used to sort input sections in the output file according + /// to linker script custom mappings. Return true if lhs should appear before + /// rhs. + bool less(const SectionKey &lhs, const SectionKey &rhs) const; + + /// Retrieve the name of the output section that this input section is mapped + /// to, according to custom linker script mappings. + StringRef getOutputSection(const SectionKey &key) const; + + /// Retrieve all the linker script expressions that need to be evaluated + /// before the given section is emitted. This is *not* const because the + /// first section to retrieve a given set of expression is the only one to + /// receive it. This set is marked as "delivered" and no other sections can + /// retrieve this set again. If we don't do this, multiple sections may map + /// to the same set of expressions because of wildcards rules. + std::vector<const SymbolAssignment *> getExprs(const SectionKey &key); + + /// Evaluate a single linker script expression according to our current + /// context (symbol table). This function is *not* constant because it can + /// update our symbol table with new symbols calculated in this expression. + std::error_code evalExpr(const SymbolAssignment *assgn, uint64_t &curPos); + + /// Retrieve the set of symbols defined in linker script expressions. + const llvm::StringSet<> &getScriptDefinedSymbols() const; + + /// Queries the linker script symbol table for the value of a given symbol. + /// This function must be called after linker script expressions evaluation + /// has been performed (by calling evalExpr() for all expressions). + uint64_t getLinkerScriptExprValue(StringRef name) const; + + void dump() const; + +private: + /// A custom hash operator to teach the STL how to handle our custom keys. + /// This will be used in our hash table mapping Sections to a Layout Order + /// number (caching results). + struct SectionKeyHash { + int64_t operator()(const SectionKey &k) const { + return llvm::hash_combine(k.archivePath, k.memberPath, k.sectionName); + } + }; + + /// Teach the STL when two section keys are the same. This will be used in + /// our hash table mapping Sections to a Layout Order number (caching results) + struct SectionKeyEq { + bool operator()(const SectionKey &lhs, const SectionKey &rhs) const { + return ((lhs.archivePath == rhs.archivePath) && + (lhs.memberPath == rhs.memberPath) && + (lhs.sectionName == rhs.sectionName)); + } + }; + + /// Given an order id, check if it matches the tuple + /// <archivePath, memberPath, sectionName> and returns the + /// internal id that matched, or -1 if no matches. + int matchSectionName(int id, const SectionKey &key) const; + + /// Returns a number that will determine the order of this input section + /// in the final layout. If coarse is true, we simply return the layour order + /// of the higher-level node InputSectionsCmd, used to order input sections. + /// If coarse is false, we return the layout index down to the internal + /// InputSectionsCmd arrangement, used to get the set of preceding linker + ///expressions. + int getLayoutOrder(const SectionKey &key, bool coarse) const; + + /// Compare two sections that have the same mapping rule (i.e., are matched + /// by the same InputSectionsCmd). + /// Determine if lhs < rhs by analyzing the InputSectionsCmd structure. + bool localCompare(int order, const SectionKey &lhs, + const SectionKey &rhs) const; + + + /// Our goal with all linearizeAST overloaded functions is to + /// traverse the linker script AST while putting nodes in a vector and + /// thus enforcing order among nodes (which comes first). + /// + /// The order among nodes is determined by their indexes in this vector + /// (_layoutCommands). This index allows us to solve the problem of + /// establishing the order among two different input sections: we match each + /// input sections with their respective layout command and use the indexes + /// of these commands to order these sections. + /// + /// Example: + /// + /// Given the linker script: + /// SECTIONS { + /// .text : { *(.text) } + /// .data : { *(.data) } + /// } + /// + /// The _layoutCommands vector should contain: + /// id 0 : <OutputSectionDescription> (_sectionName = ".text") + /// id 1 : <InputSectionsCmd> (_memberName = "*") + /// id 2 : <InputSectionName> (_name = ".text) + /// id 3 : <OutputSectionDescription> (_sectionName = ".data") + /// id 4 : <InputSectionsCmd> (_memberName = "*") + /// id 5 : <InputSectionName> (_name = ".data") + /// + /// If we need to sort the following input sections: + /// + /// input section A: .text from libc.a (member errno.o) + /// input section B: .data from libc.a (member write.o) + /// + /// Then we match input section A with the InputSectionsCmd of id 1, and + /// input section B with the InputSectionsCmd of id 4. Since 1 < 4, we + /// put A before B. + /// + /// The second problem handled by the linearization of the AST is the task + /// of finding all preceding expressions that need to be calculated before + /// emitting a given section. This task is easier to deal with when all nodes + /// are in a vector because otherwise we would need to traverse multiple + /// levels of the AST to find the set of expressions that preceed a layout + /// command. + /// + /// The linker script commands that are linearized ("layout commands") are: + /// + /// * OutputSectionDescription, containing an output section name + /// * InputSectionsCmd, containing an input file name + /// * InputSectionName, containing a single input section name + /// * InputSectionSortedName, a group of input section names + /// * SymbolAssignment, containing an expression that may + /// change the address where the linker is outputting data + /// + void linearizeAST(const Sections *sections); + void linearizeAST(const InputSectionsCmd *inputSections); + void linearizeAST(const InputSection *inputSection); + + void perform(const LinkerScript *ls); + + std::vector<std::unique_ptr<Parser>> _scripts; + std::vector<const Command *> _layoutCommands; + std::unordered_multimap<std::string, int> _memberToLayoutOrder; + std::vector<std::pair<StringRef, int>> _memberNameWildcards; + mutable std::unordered_map<SectionKey, int, SectionKeyHash, SectionKeyEq> + _cacheSectionOrder, _cacheExpressionOrder; + llvm::DenseSet<int> _deliveredExprs; + mutable llvm::StringSet<> _definedSymbols; + + Expression::SymbolTableTy _symbolTable; +}; + +llvm::BumpPtrAllocator &Command::getAllocator() const { + return _ctx.getAllocator(); +} +llvm::BumpPtrAllocator &Expression::getAllocator() const { + return _ctx.getAllocator(); +} +} // end namespace script +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/MachOLinkingContext.h b/include/lld/ReaderWriter/MachOLinkingContext.h new file mode 100644 index 0000000000000..8e253a1235f13 --- /dev/null +++ b/include/lld/ReaderWriter/MachOLinkingContext.h @@ -0,0 +1,369 @@ +//===- lld/ReaderWriter/MachOLinkingContext.h -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H +#define LLD_READER_WRITER_MACHO_LINKING_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include <set> + +using llvm::MachO::HeaderFileType; + +namespace lld { + +namespace mach_o { +class ArchHandler; +class MachODylibFile; +class MachOFile; +} + +class MachOLinkingContext : public LinkingContext { +public: + MachOLinkingContext(); + ~MachOLinkingContext(); + + enum Arch { + arch_unknown, + arch_ppc, + arch_x86, + arch_x86_64, + arch_armv6, + arch_armv7, + arch_armv7s, + arch_arm64, + }; + + enum class OS { + unknown, + macOSX, + iOS, + iOS_simulator + }; + + enum class ExportMode { + globals, // Default, all global symbols exported. + whiteList, // -exported_symbol[s_list], only listed symbols exported. + blackList // -unexported_symbol[s_list], no listed symbol exported. + }; + + enum class DebugInfoMode { + addDebugMap, // Default + noDebugMap // -S option + }; + + /// Initializes the context to sane default values given the specified output + /// file type, arch, os, and minimum os version. This should be called before + /// other setXXX() methods. + void configure(HeaderFileType type, Arch arch, OS os, uint32_t minOSVersion); + + void addPasses(PassManager &pm) override; + bool validateImpl(raw_ostream &diagnostics) override; + std::string demangle(StringRef symbolName) const override; + + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + uint32_t getCPUType() const; + uint32_t getCPUSubType() const; + + bool addEntryPointLoadCommand() const; + bool addUnixThreadLoadCommand() const; + bool outputTypeHasEntry() const; + bool is64Bit() const; + + virtual uint64_t pageZeroSize() const { return _pageZeroSize; } + virtual uint64_t pageSize() const { return _pageSize; } + + mach_o::ArchHandler &archHandler() const; + + HeaderFileType outputMachOType() const { return _outputMachOType; } + + Arch arch() const { return _arch; } + StringRef archName() const { return nameFromArch(_arch); } + OS os() const { return _os; } + + ExportMode exportMode() const { return _exportMode; } + void setExportMode(ExportMode mode) { _exportMode = mode; } + void addExportSymbol(StringRef sym); + bool exportRestrictMode() const { return _exportMode != ExportMode::globals; } + bool exportSymbolNamed(StringRef sym) const; + + DebugInfoMode debugInfoMode() const { return _debugInfoMode; } + void setDebugInfoMode(DebugInfoMode mode) { + _debugInfoMode = mode; + } + + void appendOrderedSymbol(StringRef symbol, StringRef filename); + + bool keepPrivateExterns() const { return _keepPrivateExterns; } + void setKeepPrivateExterns(bool v) { _keepPrivateExterns = v; } + bool demangleSymbols() const { return _demangle; } + void setDemangleSymbols(bool d) { _demangle = d; } + /// Create file at specified path which will contain a binary encoding + /// of all input and output file paths. + std::error_code createDependencyFile(StringRef path); + void addInputFileDependency(StringRef path) const; + void addInputFileNotFound(StringRef path) const; + void addOutputFileDependency(StringRef path) const; + + bool minOS(StringRef mac, StringRef iOS) const; + void setDoNothing(bool value) { _doNothing = value; } + bool doNothing() const { return _doNothing; } + bool printAtoms() const { return _printAtoms; } + bool testingFileUsage() const { return _testingFileUsage; } + const StringRefVector &searchDirs() const { return _searchDirs; } + const StringRefVector &frameworkDirs() const { return _frameworkDirs; } + void setSysLibRoots(const StringRefVector &paths); + const StringRefVector &sysLibRoots() const { return _syslibRoots; } + bool PIE() const { return _pie; } + void setPIE(bool pie) { _pie = pie; } + + uint64_t baseAddress() const { return _baseAddress; } + void setBaseAddress(uint64_t baseAddress) { _baseAddress = baseAddress; } + + /// \brief Checks whether a given path on the filesystem exists. + /// + /// When running in -test_file_usage mode, this method consults an + /// internally maintained list of files that exist (provided by -path_exists) + /// instead of the actual filesystem. + bool pathExists(StringRef path) const; + + /// Like pathExists() but only used on files - not directories. + bool fileExists(StringRef path) const; + + /// \brief Adds any library search paths derived from the given base, possibly + /// modified by -syslibroots. + /// + /// The set of paths added consists of approximately all syslibroot-prepended + /// versions of libPath that exist, or the original libPath if there are none + /// for whatever reason. With various edge-cases for compatibility. + void addModifiedSearchDir(StringRef libPath, bool isSystemPath = false); + + /// \brief Determine whether -lFoo can be resolve within the given path, and + /// return the filename if so. + /// + /// The -lFoo option is documented to search for libFoo.dylib and libFoo.a in + /// that order, unless Foo ends in ".o", in which case only the exact file + /// matches (e.g. -lfoo.o would only find foo.o). + ErrorOr<StringRef> searchDirForLibrary(StringRef path, + StringRef libName) const; + + /// \brief Iterates through all search path entries looking for libName (as + /// specified by -lFoo). + ErrorOr<StringRef> searchLibrary(StringRef libName) const; + + /// Add a framework search path. Internally, this method may be prepended + /// the path with syslibroot. + void addFrameworkSearchDir(StringRef fwPath, bool isSystemPath = false); + + /// \brief Iterates through all framework directories looking for + /// Foo.framework/Foo (when fwName = "Foo"). + ErrorOr<StringRef> findPathForFramework(StringRef fwName) const; + + /// \brief The dylib's binary compatibility version, in the raw uint32 format. + /// + /// When building a dynamic library, this is the compatibility version that + /// gets embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. At + /// runtime, the loader will verify that the binary is compatible with the + /// installed dynamic library. + uint32_t compatibilityVersion() const { return _compatibilityVersion; } + + /// \brief The dylib's current version, in the the raw uint32 format. + /// + /// When building a dynamic library, this is the current version that gets + /// embedded into the result. Other Mach-O binaries that link against + /// this library will store the compatibility version in its load command. + uint32_t currentVersion() const { return _currentVersion; } + + /// \brief The dylib's install name. + /// + /// Binaries that link against the dylib will embed this path into the dylib + /// load command. When loading the binaries at runtime, this is the location + /// on disk that the loader will look for the dylib. + StringRef installName() const { return _installName; } + + /// \brief Whether or not the dylib has side effects during initialization. + /// + /// Dylibs marked as being dead strippable provide the guarantee that loading + /// the dylib has no side effects, allowing the linker to strip out the dylib + /// when linking a binary that does not use any of its symbols. + bool deadStrippableDylib() const { return _deadStrippableDylib; } + + /// \brief The path to the executable that will load the bundle at runtime. + /// + /// When building a Mach-O bundle, this executable will be examined if there + /// are undefined symbols after the main link phase. It is expected that this + /// binary will be loading the bundle at runtime and will provide the symbols + /// at that point. + StringRef bundleLoader() const { return _bundleLoader; } + + void setCompatibilityVersion(uint32_t vers) { _compatibilityVersion = vers; } + void setCurrentVersion(uint32_t vers) { _currentVersion = vers; } + void setInstallName(StringRef name) { _installName = name; } + void setDeadStrippableDylib(bool deadStrippable) { + _deadStrippableDylib = deadStrippable; + } + void setBundleLoader(StringRef loader) { _bundleLoader = loader; } + void setPrintAtoms(bool value=true) { _printAtoms = value; } + void setTestingFileUsage(bool value = true) { + _testingFileUsage = value; + } + void addExistingPathForDebug(StringRef path) { + _existingPaths.insert(path); + } + + void addRpath(StringRef rpath); + const StringRefVector &rpaths() const { return _rpaths; } + + /// Add section alignment constraint on final layout. + void addSectionAlignment(StringRef seg, StringRef sect, uint8_t align2); + + /// Returns true if specified section had alignment constraints. + bool sectionAligned(StringRef seg, StringRef sect, uint8_t &align2) const; + + StringRef dyldPath() const { return "/usr/lib/dyld"; } + + /// Stub creation Pass should be run. + bool needsStubsPass() const; + + // GOT creation Pass should be run. + bool needsGOTPass() const; + + /// Pass to transform __compact_unwind into __unwind_info should be run. + bool needsCompactUnwindPass() const; + + /// Pass to add shims switching between thumb and arm mode. + bool needsShimPass() const; + + /// Magic symbol name stubs will need to help lazy bind. + StringRef binderSymbolName() const; + + /// Used to keep track of direct and indirect dylibs. + void registerDylib(mach_o::MachODylibFile *dylib, bool upward) const; + + // Reads a file from disk to memory. Returns only a needed chunk + // if a fat binary. + ErrorOr<std::unique_ptr<MemoryBuffer>> getMemoryBuffer(StringRef path); + + /// Used to find indirect dylibs. Instantiates a MachODylibFile if one + /// has not already been made for the requested dylib. Uses -L and -F + /// search paths to allow indirect dylibs to be overridden. + mach_o::MachODylibFile* findIndirectDylib(StringRef path); + + uint32_t dylibCurrentVersion(StringRef installName) const; + + uint32_t dylibCompatVersion(StringRef installName) const; + + /// Creates a copy (owned by this MachOLinkingContext) of a string. + StringRef copy(StringRef str) { return str.copy(_allocator); } + + /// If the memoryBuffer is a fat file with a slice for the current arch, + /// this method will return the offset and size of that slice. + bool sliceFromFatFile(const MemoryBuffer &mb, uint32_t &offset, + uint32_t &size); + + /// Returns if a command line option specified dylib is an upward link. + bool isUpwardDylib(StringRef installName) const; + + static bool isThinObjectFile(StringRef path, Arch &arch); + static Arch archFromCpuType(uint32_t cputype, uint32_t cpusubtype); + static Arch archFromName(StringRef archName); + static StringRef nameFromArch(Arch arch); + static uint32_t cpuTypeFromArch(Arch arch); + static uint32_t cpuSubtypeFromArch(Arch arch); + static bool is64Bit(Arch arch); + static bool isHostEndian(Arch arch); + static bool isBigEndian(Arch arch); + + /// Construct 32-bit value from string "X.Y.Z" where + /// bits are xxxx.yy.zz. Largest number is 65535.255.255 + static bool parsePackedVersion(StringRef str, uint32_t &result); + + void finalizeInputFiles() override; + + bool customAtomOrderer(const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight) const; + +private: + Writer &writer() const override; + mach_o::MachODylibFile* loadIndirectDylib(StringRef path); + void checkExportWhiteList(const DefinedAtom *atom) const; + void checkExportBlackList(const DefinedAtom *atom) const; + struct ArchInfo { + StringRef archName; + MachOLinkingContext::Arch arch; + bool littleEndian; + uint32_t cputype; + uint32_t cpusubtype; + }; + + struct SectionAlign { + StringRef segmentName; + StringRef sectionName; + uint8_t align2; + }; + + struct OrderFileNode { + StringRef fileFilter; + unsigned order; + }; + + static bool findOrderOrdinal(const std::vector<OrderFileNode> &nodes, + const DefinedAtom *atom, unsigned &ordinal); + + static ArchInfo _s_archInfos[]; + + std::set<StringRef> _existingPaths; // For testing only. + StringRefVector _searchDirs; + StringRefVector _syslibRoots; + StringRefVector _frameworkDirs; + HeaderFileType _outputMachOType; // e.g MH_EXECUTE + bool _outputMachOTypeStatic; // Disambiguate static vs dynamic prog + bool _doNothing; // for -help and -v which just print info + bool _pie; + Arch _arch; + OS _os; + uint32_t _osMinVersion; + uint64_t _pageZeroSize; + uint64_t _pageSize; + uint64_t _baseAddress; + uint32_t _compatibilityVersion; + uint32_t _currentVersion; + StringRef _installName; + StringRefVector _rpaths; + bool _deadStrippableDylib; + bool _printAtoms; + bool _testingFileUsage; + bool _keepPrivateExterns; + bool _demangle; + StringRef _bundleLoader; + mutable std::unique_ptr<mach_o::ArchHandler> _archHandler; + mutable std::unique_ptr<Writer> _writer; + std::vector<SectionAlign> _sectAligns; + mutable llvm::StringMap<mach_o::MachODylibFile*> _pathToDylibMap; + mutable std::set<mach_o::MachODylibFile*> _allDylibs; + mutable std::set<mach_o::MachODylibFile*> _upwardDylibs; + mutable std::vector<std::unique_ptr<File>> _indirectDylibs; + ExportMode _exportMode; + llvm::StringSet<> _exportedSymbols; + DebugInfoMode _debugInfoMode; + std::unique_ptr<llvm::raw_fd_ostream> _dependencyInfo; + llvm::StringMap<std::vector<OrderFileNode>> _orderFiles; + unsigned _orderFileEntries; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/PECOFFLinkingContext.h b/include/lld/ReaderWriter/PECOFFLinkingContext.h new file mode 100644 index 0000000000000..cccb8ac03b6e3 --- /dev/null +++ b/include/lld/ReaderWriter/PECOFFLinkingContext.h @@ -0,0 +1,463 @@ +//===- lld/ReaderWriter/PECOFFLinkingContext.h ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PECOFF_LINKING_CONTEXT_H +#define LLD_READER_WRITER_PECOFF_LINKING_CONTEXT_H + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileUtilities.h" +#include <map> +#include <mutex> +#include <set> +#include <vector> + +using llvm::COFF::MachineTypes; +using llvm::COFF::WindowsSubsystem; + +static const uint8_t DEFAULT_DOS_STUB[128] = {'M', 'Z'}; + +namespace lld { + +class PECOFFLinkingContext : public LinkingContext { +public: + PECOFFLinkingContext() + : _mutex(), _allocMutex(), _hasEntry(true), + _baseAddress(invalidBaseAddress), _stackReserve(1024 * 1024), + _stackCommit(4096), _heapReserve(1024 * 1024), _heapCommit(4096), + _noDefaultLibAll(false), _sectionDefaultAlignment(4096), + _subsystem(llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN), + _machineType(llvm::COFF::IMAGE_FILE_MACHINE_I386), _imageVersion(0, 0), + _minOSVersion(6, 0), _nxCompat(true), _largeAddressAware(false), + _allowBind(true), _allowIsolation(true), _swapRunFromCD(false), + _swapRunFromNet(false), _baseRelocationEnabled(true), + _terminalServerAware(true), _dynamicBaseEnabled(true), + _createManifest(true), _embedManifest(false), _manifestId(1), + _manifestUAC(true), _manifestLevel("'asInvoker'"), + _manifestUiAccess("'false'"), _isDll(false), _highEntropyVA(true), + _requireSEH(false), _noSEH(false), _implib(""), _debug(false), + _pdbFilePath(""), _dosStub(llvm::makeArrayRef(DEFAULT_DOS_STUB)), + _parseDirectives(nullptr) { + setDeadStripping(true); + } + + struct Version { + Version(int v1, int v2) : majorVersion(v1), minorVersion(v2) {} + int majorVersion; + int minorVersion; + }; + + struct ExportDesc { + ExportDesc() + : ordinal(-1), noname(false), isData(false), isPrivate(false) {} + + bool operator<(const ExportDesc &other) const { + return getExternalName().compare(other.getExternalName()) < 0; + } + + StringRef getRealName() const { + return mangledName.empty() ? name : mangledName; + } + + StringRef getExternalName() const { + return externalName.empty() ? name : externalName; + } + + std::string name; + std::string externalName; + std::string mangledName; + int ordinal; + bool noname; + bool isData; + bool isPrivate; + }; + + typedef bool (*ParseDirectives)(int, const char **, PECOFFLinkingContext &, + raw_ostream &); + + /// \brief Casting support + static bool classof(const LinkingContext *info) { return true; } + + Writer &writer() const override; + bool validateImpl(raw_ostream &diagnostics) override; + + void addPasses(PassManager &pm) override; + + bool createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) override; + + bool is64Bit() const { + return _machineType == llvm::COFF::IMAGE_FILE_MACHINE_AMD64; + } + + // Returns a set of all defined symbols in input files. + const std::set<std::string> &definedSymbols(); + + /// Page size of x86 processor. Some data needs to be aligned at page boundary + /// when loaded into memory. + uint64_t getPageSize() const { + return 0x1000; + } + + void appendInputSearchPath(StringRef dirPath) { + _inputSearchPaths.push_back(dirPath); + } + + const std::vector<StringRef> getInputSearchPaths() { + return _inputSearchPaths; + } + + void registerTemporaryFile(StringRef path) { + std::unique_ptr<llvm::FileRemover> fileRemover( + new llvm::FileRemover(Twine(allocate(path)))); + _tempFiles.push_back(std::move(fileRemover)); + } + + StringRef searchLibraryFile(StringRef path) const; + + StringRef decorateSymbol(StringRef name) const; + StringRef undecorateSymbol(StringRef name) const; + + void setEntrySymbolName(StringRef name) { _entry = name; } + StringRef getEntrySymbolName() const { return _entry; } + + void setHasEntry(bool val) { _hasEntry = val; } + bool hasEntry() const { return _hasEntry; } + + void setBaseAddress(uint64_t addr) { _baseAddress = addr; } + uint64_t getBaseAddress() const; + + void setStackReserve(uint64_t size) { _stackReserve = size; } + void setStackCommit(uint64_t size) { _stackCommit = size; } + uint64_t getStackReserve() const { return _stackReserve; } + uint64_t getStackCommit() const { return _stackCommit; } + + void setHeapReserve(uint64_t size) { _heapReserve = size; } + void setHeapCommit(uint64_t size) { _heapCommit = size; } + uint64_t getHeapReserve() const { return _heapReserve; } + uint64_t getHeapCommit() const { return _heapCommit; } + + void setSectionDefaultAlignment(uint32_t val) { + _sectionDefaultAlignment = val; + } + uint32_t getSectionDefaultAlignment() const { + return _sectionDefaultAlignment; + } + + void setSubsystem(WindowsSubsystem ss) { _subsystem = ss; } + WindowsSubsystem getSubsystem() const { return _subsystem; } + + void setMachineType(MachineTypes type) { _machineType = type; } + MachineTypes getMachineType() const { return _machineType; } + + void setImageVersion(const Version &version) { _imageVersion = version; } + Version getImageVersion() const { return _imageVersion; } + + void setMinOSVersion(const Version &version) { _minOSVersion = version; } + Version getMinOSVersion() const { return _minOSVersion; } + + void setNxCompat(bool nxCompat) { _nxCompat = nxCompat; } + bool isNxCompat() const { return _nxCompat; } + + void setLargeAddressAware(bool val) { _largeAddressAware = val; } + bool getLargeAddressAware() const { return _largeAddressAware; } + + void setAllowBind(bool val) { _allowBind = val; } + bool getAllowBind() const { return _allowBind; } + + void setAllowIsolation(bool val) { _allowIsolation = val; } + bool getAllowIsolation() const { return _allowIsolation; } + + void setSwapRunFromCD(bool val) { _swapRunFromCD = val; } + bool getSwapRunFromCD() const { return _swapRunFromCD; } + + void setSwapRunFromNet(bool val) { _swapRunFromNet = val; } + bool getSwapRunFromNet() const { return _swapRunFromNet; } + + void setBaseRelocationEnabled(bool val) { _baseRelocationEnabled = val; } + bool getBaseRelocationEnabled() const { return _baseRelocationEnabled; } + + void setTerminalServerAware(bool val) { _terminalServerAware = val; } + bool isTerminalServerAware() const { return _terminalServerAware; } + + void setDynamicBaseEnabled(bool val) { _dynamicBaseEnabled = val; } + bool getDynamicBaseEnabled() const { return _dynamicBaseEnabled; } + + void setCreateManifest(bool val) { _createManifest = val; } + bool getCreateManifest() const { return _createManifest; } + + void setManifestOutputPath(std::string val) { _manifestOutputPath = val; } + const std::string &getManifestOutputPath() const { + return _manifestOutputPath; + } + + void setEmbedManifest(bool val) { _embedManifest = val; } + bool getEmbedManifest() const { return _embedManifest; } + + void setManifestId(int val) { _manifestId = val; } + int getManifestId() const { return _manifestId; } + + void setManifestUAC(bool val) { _manifestUAC = val; } + bool getManifestUAC() const { return _manifestUAC; } + + void setManifestLevel(std::string val) { _manifestLevel = std::move(val); } + const std::string &getManifestLevel() const { return _manifestLevel; } + + void setManifestUiAccess(std::string val) { _manifestUiAccess = val; } + const std::string &getManifestUiAccess() const { return _manifestUiAccess; } + + void setManifestDependency(std::string val) { _manifestDependency = val; } + const std::string &getManifestDependency() const { + return _manifestDependency; + } + + void setIsDll(bool val) { _isDll = val; } + bool isDll() const { return _isDll; } + + void setSafeSEH(bool val) { + if (val) + _requireSEH = true; + else + _noSEH = true; + } + bool requireSEH() const { return _requireSEH; } + bool noSEH() const { return _noSEH; } + + void setHighEntropyVA(bool val) { _highEntropyVA = val; } + bool getHighEntropyVA() const { return _highEntropyVA; } + + void setOutputImportLibraryPath(const std::string &val) { _implib = val; } + std::string getOutputImportLibraryPath() const; + + void setDebug(bool val) { _debug = val; } + bool getDebug() { return _debug; } + + void setPDBFilePath(StringRef str) { _pdbFilePath = str; } + std::string getPDBFilePath() const; + + void addDelayLoadDLL(StringRef dll) { + _delayLoadDLLs.insert(dll.lower()); + } + bool isDelayLoadDLL(StringRef dll) const { + return _delayLoadDLLs.count(dll.lower()) == 1; + } + + StringRef getOutputSectionName(StringRef sectionName) const; + bool addSectionRenaming(raw_ostream &diagnostics, + StringRef from, StringRef to); + + const std::set<std::string> &getAlternateNames(StringRef name) { + return _alternateNames[name]; + } + + void addAlternateName(StringRef weak, StringRef def) { + _alternateNames[def].insert(weak); + } + + void addNoDefaultLib(StringRef path) { + if (path.endswith_lower(".lib")) + _noDefaultLibs.insert(path.drop_back(4).lower()); + else + _noDefaultLibs.insert(path.lower()); + } + + bool hasNoDefaultLib(StringRef path) const { + if (path.endswith_lower(".lib")) + return _noDefaultLibs.count(path.drop_back(4).lower()) > 0; + return _noDefaultLibs.count(path.lower()) > 0; + } + + void setNoDefaultLibAll(bool val) { _noDefaultLibAll = val; } + bool getNoDefaultLibAll() const { return _noDefaultLibAll; } + + void setSectionSetMask(StringRef sectionName, uint32_t flags); + void setSectionClearMask(StringRef sectionName, uint32_t flags); + uint32_t getSectionAttributes(StringRef sectionName, uint32_t flags) const; + + void setDosStub(ArrayRef<uint8_t> data) { _dosStub = data; } + ArrayRef<uint8_t> getDosStub() const { return _dosStub; } + + void addDllExport(ExportDesc &desc); + std::vector<ExportDesc> &getDllExports() { return _dllExports; } + const std::vector<ExportDesc> &getDllExports() const { return _dllExports; } + + StringRef getDelayLoadHelperName() const { + return is64Bit() ? "__delayLoadHelper2" : "___delayLoadHelper2@8"; + } + + StringRef allocate(StringRef ref) const { + _allocMutex.lock(); + char *x = _allocator.Allocate<char>(ref.size() + 1); + _allocMutex.unlock(); + memcpy(x, ref.data(), ref.size()); + x[ref.size()] = '\0'; + return x; + } + + ArrayRef<uint8_t> allocate(ArrayRef<uint8_t> array) const { + size_t size = array.size(); + _allocMutex.lock(); + uint8_t *p = _allocator.Allocate<uint8_t>(size); + _allocMutex.unlock(); + memcpy(p, array.data(), size); + return ArrayRef<uint8_t>(p, p + array.size()); + } + + template <typename T> T &allocateCopy(const T &x) const { + _allocMutex.lock(); + T *r = new (_allocator) T(x); + _allocMutex.unlock(); + return *r; + } + + void addLibraryFile(std::unique_ptr<FileNode> file); + + void setModuleDefinitionFile(const std::string val) { + _moduleDefinitionFile = val; + } + std::string getModuleDefinitionFile() const { + return _moduleDefinitionFile; + } + + std::recursive_mutex &getMutex() { return _mutex; } + + void setParseDirectives(ParseDirectives parseDirectives) { + _parseDirectives = parseDirectives; + } + + ParseDirectives getParseDirectives() { + return _parseDirectives; + } + +protected: + /// Method to create a internal file for the entry symbol + std::unique_ptr<File> createEntrySymbolFile() const override; + + /// Method to create a internal file for an undefined symbol + std::unique_ptr<File> createUndefinedSymbolFile() const override; + +private: + enum : uint64_t { + invalidBaseAddress = UINT64_MAX, + pe32DefaultBaseAddress = 0x400000U, + pe32PlusDefaultBaseAddress = 0x140000000U + }; + + std::recursive_mutex _mutex; + mutable std::mutex _allocMutex; + + std::string _entry; + + // False if /noentry option is given. + bool _hasEntry; + + // The start address for the program. The default value for the executable is + // 0x400000, but can be altered using /base command line option. + uint64_t _baseAddress; + + uint64_t _stackReserve; + uint64_t _stackCommit; + uint64_t _heapReserve; + uint64_t _heapCommit; + bool _noDefaultLibAll; + uint32_t _sectionDefaultAlignment; + WindowsSubsystem _subsystem; + MachineTypes _machineType; + Version _imageVersion; + Version _minOSVersion; + bool _nxCompat; + bool _largeAddressAware; + bool _allowBind; + bool _allowIsolation; + bool _swapRunFromCD; + bool _swapRunFromNet; + bool _baseRelocationEnabled; + bool _terminalServerAware; + bool _dynamicBaseEnabled; + bool _createManifest; + std::string _manifestOutputPath; + bool _embedManifest; + int _manifestId; + bool _manifestUAC; + std::string _manifestLevel; + std::string _manifestUiAccess; + std::string _manifestDependency; + bool _isDll; + bool _highEntropyVA; + + // True if /SAFESEH option is specified. Valid only for x86. If true, LLD will + // produce an image with SEH table. If any modules were not compatible with + // SEH, LLD will exit with an error. + bool _requireSEH; + + // True if /SAFESEH:no option is specified. Valid only for x86. If true, LLD + // will not produce an image with SEH table even if all input object files are + // compatible with SEH. + bool _noSEH; + + // /IMPLIB command line option. + std::string _implib; + + // True if /DEBUG is given. + bool _debug; + + // PDB file output path. NB: this is dummy -- LLD just creates the empty file. + std::string _pdbFilePath; + + // /DELAYLOAD option. + std::set<std::string> _delayLoadDLLs; + + // The set to store /nodefaultlib arguments. + std::set<std::string> _noDefaultLibs; + + std::vector<StringRef> _inputSearchPaths; + std::unique_ptr<Writer> _writer; + + // A map for weak aliases. + std::map<std::string, std::set<std::string>> _alternateNames; + + // A map for section renaming. For example, if there is an entry in the map + // whose value is .rdata -> .text, the section contens of .rdata will be + // merged to .text in the resulting executable. + std::map<std::string, std::string> _renamedSections; + + // Section attributes specified by /section option. + std::map<std::string, uint32_t> _sectionSetMask; + std::map<std::string, uint32_t> _sectionClearMask; + + // DLLExport'ed symbols. + std::vector<ExportDesc> _dllExports; + + // List of files that will be removed on destruction. + std::vector<std::unique_ptr<llvm::FileRemover> > _tempFiles; + + // DOS Stub. DOS stub is data located at the beginning of PE/COFF file. + // Windows loader do not really care about DOS stub contents, but it's usually + // a small DOS program that prints out a message "This program requires + // Microsoft Windows." This feature was somewhat useful before Windows 95. + ArrayRef<uint8_t> _dosStub; + + // Name of the temporary file for lib.exe subcommand. For debugging + // only. + std::string _moduleDefinitionFile; + + std::set<std::string> _definedSyms; + std::set<Node *> _seen; + + ParseDirectives _parseDirectives; +}; + +} // end namespace lld + +#endif diff --git a/include/lld/ReaderWriter/RelocationHelperFunctions.h b/include/lld/ReaderWriter/RelocationHelperFunctions.h new file mode 100644 index 0000000000000..8738e91ebabc6 --- /dev/null +++ b/include/lld/ReaderWriter/RelocationHelperFunctions.h @@ -0,0 +1,57 @@ +//===- lld/ReaderWriter/RelocationHelperFunctions.h------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_RELOCATION_HELPER_FUNCTIONS_H +#define LLD_READER_WRITER_RELOCATION_HELPER_FUNCTIONS_H + +namespace lld { + +/// Gather val's bits as specified by the mask. Example: +/// +/// Val: 0bABCDEFGHIJKLMN +/// Mask: 0b10111100001011 +/// Output: 0b000000ACDEFKMN +template <typename T> T gatherBits(T val, T mask) { + T result = 0; + size_t off = 0; + + for (size_t bit = 0; bit < sizeof(T) * 8; ++bit) { + bool maskBit = (mask >> bit) & 1; + if (maskBit) { + bool valBit = (val >> bit) & 1; + result |= static_cast<T>(valBit) << off; + ++off; + } + } + return result; +} + +/// Scatter val's bits as specified by the mask. Example: +/// +/// Val: 0bABCDEFG +/// Mask: 0b10111100001011 +/// Output: 0b00ABCD0000E0FG +template <typename T> T scatterBits(T val, T mask) { + T result = 0; + size_t off = 0; + + for (size_t bit = 0; bit < sizeof(T) * 8; ++bit) { + bool maskBit = (mask >> bit) & 1; + if (maskBit) { + bool valBit = (val >> off) & 1; + result |= static_cast<T>(valBit) << bit; + ++off; + } + } + return result; +} + +} // namespace lld + +#endif // LLD_READER_WRITER_RELOCATION_HELPER_FUNCTIONS_H diff --git a/include/lld/ReaderWriter/YamlContext.h b/include/lld/ReaderWriter/YamlContext.h new file mode 100644 index 0000000000000..a15a398ec636a --- /dev/null +++ b/include/lld/ReaderWriter/YamlContext.h @@ -0,0 +1,46 @@ +//===- lld/ReaderWriter/YamlContext.h - object used in YAML I/O context ---===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_YAML_CONTEXT_H +#define LLD_READER_WRITER_YAML_CONTEXT_H + +#include "lld/Core/LLVM.h" +#include <functional> +#include <memory> +#include <vector> + +namespace lld { +class File; +class LinkingContext; +namespace mach_o { +namespace normalized { +struct NormalizedFile; +} +} + +using lld::mach_o::normalized::NormalizedFile; + +/// When YAML I/O is used in lld, the yaml context always holds a YamlContext +/// object. We need to support hetergenous yaml documents which each require +/// different context info. This struct supports all clients. +struct YamlContext { + YamlContext() + : _linkingContext(nullptr), _registry(nullptr), _file(nullptr), + _normalizeMachOFile(nullptr) {} + + const LinkingContext *_linkingContext; + const Registry *_registry; + File *_file; + NormalizedFile *_normalizeMachOFile; + StringRef _path; +}; + +} // end namespace lld + +#endif // LLD_READER_WRITER_YAML_CONTEXT_H |