diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
commit | fb911942f1434f3d1750f83f25f5e42c80e60638 (patch) | |
tree | 1678c4a4f0182e4029a86d135aa4a1b7d09e3c41 /lib | |
download | src-fb911942f1434f3d1750f83f25f5e42c80e60638.tar.gz src-fb911942f1434f3d1750f83f25f5e42c80e60638.zip |
Notes
Diffstat (limited to 'lib')
220 files changed, 49542 insertions, 0 deletions
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt new file mode 100644 index 000000000000..699f5e93f8af --- /dev/null +++ b/lib/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(Config) +add_subdirectory(Core) +add_subdirectory(Driver) +add_subdirectory(ReaderWriter) diff --git a/lib/Config/CMakeLists.txt b/lib/Config/CMakeLists.txt new file mode 100644 index 000000000000..f7ea0423b2c9 --- /dev/null +++ b/lib/Config/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(lldConfig + Version.cpp + LINK_LIBS + LLVMSupport + ) diff --git a/lib/Config/Makefile b/lib/Config/Makefile new file mode 100644 index 000000000000..b3c57f81418f --- /dev/null +++ b/lib/Config/Makefile @@ -0,0 +1,13 @@ +##===- lib/Config/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../.. +LIBRARYNAME := lldConfig + +include $(LLD_LEVEL)/Makefile diff --git a/lib/Config/Version.cpp b/lib/Config/Version.cpp new file mode 100644 index 000000000000..b64ccef12c7b --- /dev/null +++ b/lib/Config/Version.cpp @@ -0,0 +1,66 @@ +//===- lib/Config/Version.cpp - LLD Version Number ---------------*- C++-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several version-related utility functions for LLD. +// +//===----------------------------------------------------------------------===// + +#include "lld/Config/Version.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdlib> +#include <cstring> + +using namespace llvm; + +namespace lld { + +StringRef getLLDRepositoryPath() { +#ifdef LLD_REPOSITORY_STRING + return LLD_REPOSITORY_STRING; +#else + return ""; +#endif +} + +StringRef getLLDRevision() { +#ifdef LLD_REVISION_STRING + return LLD_REVISION_STRING; +#else + return ""; +#endif +} + +std::string getLLDRepositoryVersion() { + std::string buf; + llvm::raw_string_ostream OS(buf); + std::string Path = getLLDRepositoryPath(); + std::string Revision = getLLDRevision(); + if (!Path.empty() || !Revision.empty()) { + OS << '('; + if (!Path.empty()) + OS << Path; + if (!Revision.empty()) { + if (!Path.empty()) + OS << ' '; + OS << Revision; + } + OS << ')'; + } + return OS.str(); +} + +StringRef getLLDVersion() { +#ifdef LLD_VERSION_STRING + return LLD_VERSION_STRING; +#else + return ""; +#endif +} + +} // end namespace lld diff --git a/lib/Core/CMakeLists.txt b/lib/Core/CMakeLists.txt new file mode 100644 index 000000000000..009b50a38335 --- /dev/null +++ b/lib/Core/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(lldCore + DefinedAtom.cpp + Error.cpp + File.cpp + LinkingContext.cpp + Reader.cpp + Resolver.cpp + SymbolTable.cpp + Writer.cpp + LINK_LIBS + LLVMSupport + ) diff --git a/lib/Core/DefinedAtom.cpp b/lib/Core/DefinedAtom.cpp new file mode 100644 index 000000000000..b3f81ca65a91 --- /dev/null +++ b/lib/Core/DefinedAtom.cpp @@ -0,0 +1,96 @@ +//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" + +namespace lld { + +DefinedAtom::ContentPermissions DefinedAtom::permissions() const { + // By default base permissions on content type. + return permissions(this->contentType()); +} + +// Utility function for deriving permissions from content type +DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { + switch (type) { + case typeCode: + case typeResolver: + case typeBranchIsland: + case typeBranchShim: + case typeStub: + case typeStubHelper: + case typeMachHeader: + return permR_X; + + case typeConstant: + case typeCString: + case typeUTF16String: + case typeCFI: + case typeLSDA: + case typeLiteral4: + case typeLiteral8: + case typeLiteral16: + case typeDTraceDOF: + case typeCompactUnwindInfo: + case typeProcessedUnwindInfo: + case typeRONote: + case typeNoAlloc: + return permR__; + + case typeData: + case typeDataFast: + case typeZeroFill: + case typeZeroFillFast: + case typeObjC1Class: + case typeLazyPointer: + case typeLazyDylibPointer: + case typeThunkTLV: + case typeRWNote: + return permRW_; + + case typeGOT: + case typeConstData: + case typeCFString: + case typeInitializerPtr: + case typeTerminatorPtr: + case typeCStringPtr: + case typeObjCClassPtr: + case typeObjC2CategoryList: + case typeInterposingTuples: + case typeTLVInitialData: + case typeTLVInitialZeroFill: + case typeTLVInitializerPtr: + case typeThreadData: + case typeThreadZeroFill: + return permRW_L; + + case typeGroupComdat: + case typeGnuLinkOnce: + case typeUnknown: + case typeTempLTO: + return permUnknown; + } + llvm_unreachable("unknown content type"); +} + +bool DefinedAtom::compareByPosition(const DefinedAtom *lhs, + const DefinedAtom *rhs) { + if (lhs == rhs) + return false; + const File *lhsFile = &lhs->file(); + const File *rhsFile = &rhs->file(); + if (lhsFile->ordinal() != rhsFile->ordinal()) + return lhsFile->ordinal() < rhsFile->ordinal(); + assert(lhs->ordinal() != rhs->ordinal()); + return lhs->ordinal() < rhs->ordinal(); +} + +} // namespace diff --git a/lib/Core/Error.cpp b/lib/Core/Error.cpp new file mode 100644 index 000000000000..24809c3869e5 --- /dev/null +++ b/lib/Core/Error.cpp @@ -0,0 +1,151 @@ +//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include <mutex> +#include <string> +#include <vector> + +using namespace lld; + +class _NativeReaderErrorCategory : public std::error_category { +public: + const char* name() const LLVM_NOEXCEPT override { + return "lld.native.reader"; + } + + std::string message(int ev) const override { + switch (static_cast<NativeReaderError>(ev)) { + case NativeReaderError::success: + return "Success"; + case NativeReaderError::unknown_file_format: + return "Unknown file format"; + case NativeReaderError::file_too_short: + return "file truncated"; + case NativeReaderError::file_malformed: + return "file malformed"; + case NativeReaderError::memory_error: + return "out of memory"; + case NativeReaderError::unknown_chunk_type: + return "unknown chunk type"; + case NativeReaderError::conflicting_target_machine: + return "conflicting target machine"; + } + llvm_unreachable("An enumerator of NativeReaderError does not have a " + "message defined."); + } +}; + +const std::error_category &lld::native_reader_category() { + static _NativeReaderErrorCategory o; + return o; +} + +class _YamlReaderErrorCategory : public std::error_category { +public: + const char* name() const LLVM_NOEXCEPT override { + return "lld.yaml.reader"; + } + + std::string message(int ev) const override { + switch (static_cast<YamlReaderError>(ev)) { + case YamlReaderError::success: + return "Success"; + case YamlReaderError::unknown_keyword: + return "Unknown keyword found in yaml file"; + case YamlReaderError::illegal_value: + return "Bad value found in yaml file"; + } + llvm_unreachable("An enumerator of YamlReaderError does not have a " + "message defined."); + } +}; + +const std::error_category &lld::YamlReaderCategory() { + static _YamlReaderErrorCategory o; + return o; +} + +class _LinkerScriptReaderErrorCategory : public std::error_category { +public: + const char *name() const LLVM_NOEXCEPT override { + return "lld.linker-script.reader"; + } + + std::string message(int ev) const override { + switch (static_cast<LinkerScriptReaderError>(ev)) { + case LinkerScriptReaderError::success: + return "Success"; + case LinkerScriptReaderError::parse_error: + return "Error parsing linker script"; + case LinkerScriptReaderError::unknown_symbol_in_expr: + return "Unknown symbol found when evaluating linker script expression"; + case LinkerScriptReaderError::unrecognized_function_in_expr: + return "Unrecognized function call when evaluating linker script " + "expression"; + } + llvm_unreachable("An enumerator of LinkerScriptReaderError does not have a " + "message defined."); + } +}; + +const std::error_category &lld::LinkerScriptReaderCategory() { + static _LinkerScriptReaderErrorCategory o; + return o; +} + + +namespace lld { + +/// Temporary class to enable make_dynamic_error_code() until +/// llvm::ErrorOr<> is updated to work with error encapsulations +/// other than error_code. +class dynamic_error_category : public std::error_category { +public: + ~dynamic_error_category() LLVM_NOEXCEPT {} + + const char *name() const LLVM_NOEXCEPT override { + return "lld.dynamic_error"; + } + + std::string message(int ev) const override { + assert(ev >= 0); + assert(ev < (int)_messages.size()); + // The value is an index into the string vector. + return _messages[ev]; + } + + int add(std::string msg) { + std::lock_guard<std::recursive_mutex> lock(_mutex); + // Value zero is always the successs value. + if (_messages.empty()) + _messages.push_back("Success"); + _messages.push_back(msg); + // Return the index of the string just appended. + return _messages.size() - 1; + } + +private: + std::vector<std::string> _messages; + std::recursive_mutex _mutex; +}; + +static dynamic_error_category categorySingleton; + +std::error_code make_dynamic_error_code(StringRef msg) { + return std::error_code(categorySingleton.add(msg), categorySingleton); +} + +std::error_code make_dynamic_error_code(const Twine &msg) { + return std::error_code(categorySingleton.add(msg.str()), categorySingleton); +} + +} diff --git a/lib/Core/File.cpp b/lib/Core/File.cpp new file mode 100644 index 000000000000..dbac86b368aa --- /dev/null +++ b/lib/Core/File.cpp @@ -0,0 +1,30 @@ +//===- Core/File.cpp - A Container of Atoms -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include <mutex> + +namespace lld { + +File::~File() {} + +File::atom_collection_empty<DefinedAtom> File::_noDefinedAtoms; +File::atom_collection_empty<UndefinedAtom> File::_noUndefinedAtoms; +File::atom_collection_empty<SharedLibraryAtom> File::_noSharedLibraryAtoms; +File::atom_collection_empty<AbsoluteAtom> File::_noAbsoluteAtoms; + +std::error_code File::parse() { + std::lock_guard<std::mutex> lock(_parseMutex); + if (!_lastError.hasValue()) + _lastError = doParse(); + return _lastError.getValue(); +} + +} // namespace lld diff --git a/lib/Core/LinkingContext.cpp b/lib/Core/LinkingContext.cpp new file mode 100644 index 000000000000..c6656b935916 --- /dev/null +++ b/lib/Core/LinkingContext.cpp @@ -0,0 +1,104 @@ +//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Alias.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/Triple.h" + +namespace lld { + +LinkingContext::LinkingContext() + : _deadStrip(false), _allowDuplicates(false), + _globalsAreDeadStripRoots(false), + _searchArchivesToOverrideTentativeDefinitions(false), + _searchSharedLibrariesToOverrideTentativeDefinitions(false), + _warnIfCoalesableAtomsHaveDifferentCanBeNull(false), + _warnIfCoalesableAtomsHaveDifferentLoadName(false), + _printRemainingUndefines(true), _allowRemainingUndefines(false), + _logInputFiles(false), _allowShlibUndefines(false), + _outputFileType(OutputFileType::Default), _nextOrdinal(0) {} + +LinkingContext::~LinkingContext() {} + +bool LinkingContext::validate(raw_ostream &diagnostics) { + return validateImpl(diagnostics); +} + +std::error_code LinkingContext::writeFile(const File &linkedFile) const { + return this->writer().writeFile(linkedFile, _outputPath); +} + +bool LinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + return this->writer().createImplicitFiles(result); +} + +std::unique_ptr<File> LinkingContext::createEntrySymbolFile() const { + return createEntrySymbolFile("<command line option -e>"); +} + +std::unique_ptr<File> +LinkingContext::createEntrySymbolFile(StringRef filename) const { + if (entrySymbolName().empty()) + return nullptr; + std::unique_ptr<SimpleFile> entryFile(new SimpleFile(filename)); + entryFile->addAtom( + *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); + return std::move(entryFile); +} + +std::unique_ptr<File> LinkingContext::createUndefinedSymbolFile() const { + return createUndefinedSymbolFile("<command line option -u or --defsym>"); +} + +std::unique_ptr<File> +LinkingContext::createUndefinedSymbolFile(StringRef filename) const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr<SimpleFile> undefinedSymFile(new SimpleFile(filename)); + for (StringRef undefSym : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( + *undefinedSymFile, undefSym))); + return std::move(undefinedSymFile); +} + +std::unique_ptr<File> LinkingContext::createAliasSymbolFile() const { + if (getAliases().empty()) + return nullptr; + std::unique_ptr<SimpleFile> file(new SimpleFile("<alias>")); + for (const auto &i : getAliases()) { + StringRef from = i.first; + StringRef to = i.second; + SimpleDefinedAtom *fromAtom = new (_allocator) AliasAtom(*file, from); + UndefinedAtom *toAtom = new (_allocator) SimpleUndefinedAtom(*file, to); + fromAtom->addReference(Reference::KindNamespace::all, + Reference::KindArch::all, Reference::kindLayoutAfter, + 0, toAtom, 0); + file->addAtom(*fromAtom); + file->addAtom(*toAtom); + } + return std::move(file); +} + +void LinkingContext::createInternalFiles( + std::vector<std::unique_ptr<File> > &result) const { + if (std::unique_ptr<File> file = createEntrySymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr<File> file = createUndefinedSymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr<File> file = createAliasSymbolFile()) + result.push_back(std::move(file)); +} + +void LinkingContext::addPasses(PassManager &pm) {} + +} // end namespace lld diff --git a/lib/Core/Makefile b/lib/Core/Makefile new file mode 100644 index 000000000000..042d01a1e1b3 --- /dev/null +++ b/lib/Core/Makefile @@ -0,0 +1,13 @@ +##===- lld/lib/Core/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../.. +LIBRARYNAME := lldCore + +include $(LLD_LEVEL)/Makefile diff --git a/lib/Core/Reader.cpp b/lib/Core/Reader.cpp new file mode 100644 index 000000000000..6f8b8cbd1bf8 --- /dev/null +++ b/lib/Core/Reader.cpp @@ -0,0 +1,117 @@ +//===- lib/Core/Reader.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include <memory> +#include <system_error> + +namespace lld { + +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() {} + +void Registry::add(std::unique_ptr<Reader> reader) { + _readers.push_back(std::move(reader)); +} + +void Registry::add(std::unique_ptr<YamlIOTaggedDocumentHandler> handler) { + _yamlHandlers.push_back(std::move(handler)); +} + +std::error_code +Registry::loadFile(std::unique_ptr<MemoryBuffer> mb, + std::vector<std::unique_ptr<File>> &result) const { + // Get file type. + StringRef content(mb->getBufferStart(), mb->getBufferSize()); + llvm::sys::fs::file_magic fileType = llvm::sys::fs::identify_magic(content); + // Get file extension. + StringRef extension = llvm::sys::path::extension(mb->getBufferIdentifier()); + + // Ask each registered reader if it can handle this file type or extension. + for (const std::unique_ptr<Reader> &reader : _readers) { + if (!reader->canParse(fileType, extension, *mb)) + continue; + if (std::error_code ec = reader->loadFile(std::move(mb), *this, result)) + return ec; + return std::error_code(); + } + + // No Reader could parse this file. + return make_error_code(llvm::errc::executable_format_error); +} + +static const Registry::KindStrings kindStrings[] = { + {Reference::kindLayoutAfter, "layout-after"}, + {Reference::kindGroupChild, "group-child"}, + {Reference::kindAssociate, "associate"}, + LLD_KIND_STRING_END}; + +Registry::Registry() { + addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, + kindStrings); +} + +bool Registry::handleTaggedDoc(llvm::yaml::IO &io, + const lld::File *&file) const { + for (const std::unique_ptr<YamlIOTaggedDocumentHandler> &h : _yamlHandlers) + if (h->handledDocTag(io, file)) + return true; + return false; +} + + +void Registry::addKindTable(Reference::KindNamespace ns, + Reference::KindArch arch, + const KindStrings array[]) { + KindEntry entry = { ns, arch, array }; + _kindEntries.push_back(entry); +} + +bool Registry::referenceKindFromString(StringRef inputStr, + Reference::KindNamespace &ns, + Reference::KindArch &arch, + Reference::KindValue &value) const { + for (const KindEntry &entry : _kindEntries) { + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (!inputStr.equals(pair->name)) + continue; + ns = entry.ns; + arch = entry.arch; + value = pair->value; + return true; + } + } + return false; +} + +bool Registry::referenceKindToString(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue value, + StringRef &str) const { + for (const KindEntry &entry : _kindEntries) { + if (entry.ns != ns) + continue; + if (entry.arch != arch) + continue; + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (pair->value != value) + continue; + str = pair->name; + return true; + } + } + return false; +} + +} // end namespace lld diff --git a/lib/Core/Resolver.cpp b/lib/Core/Resolver.cpp new file mode 100644 index 000000000000..393a7ef2bfc8 --- /dev/null +++ b/lib/Core/Resolver.cpp @@ -0,0 +1,516 @@ +//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Atom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/SymbolTable.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> +#include <vector> + +namespace lld { + +bool Resolver::handleFile(File &file) { + bool undefAdded = false; + for (const DefinedAtom *atom : file.defined()) + doDefinedAtom(*atom); + for (const UndefinedAtom *atom : file.undefined()) { + if (doUndefinedAtom(*atom)) { + undefAdded = true; + maybePreloadArchiveMember(atom->name()); + } + } + for (const SharedLibraryAtom *atom : file.sharedLibrary()) + doSharedLibraryAtom(*atom); + for (const AbsoluteAtom *atom : file.absolute()) + doAbsoluteAtom(*atom); + return undefAdded; +} + +void Resolver::forEachUndefines(File &file, bool searchForOverrides, + UndefCallback callback) { + size_t i = _undefineIndex[&file]; + do { + for (; i < _undefines.size(); ++i) { + StringRef undefName = _undefines[i]; + if (undefName.empty()) + continue; + const Atom *atom = _symbolTable.findByName(undefName); + if (!isa<UndefinedAtom>(atom) || _symbolTable.isCoalescedAway(atom)) { + // The symbol was resolved by some other file. Cache the result. + _undefines[i] = ""; + continue; + } + callback(undefName, false); + } + if (!searchForOverrides) + continue; + for (StringRef tentDefName : _symbolTable.tentativeDefinitions()) { + // Load for previous tentative may also have loaded + // something that overrode this tentative, so always check. + const Atom *curAtom = _symbolTable.findByName(tentDefName); + assert(curAtom != nullptr); + if (const DefinedAtom *curDefAtom = dyn_cast<DefinedAtom>(curAtom)) + if (curDefAtom->merge() == DefinedAtom::mergeAsTentative) + callback(tentDefName, true); + } + } while (i < _undefines.size()); + _undefineIndex[&file] = i; +} + +bool Resolver::handleArchiveFile(File &file) { + ArchiveLibraryFile *archiveFile = cast<ArchiveLibraryFile>(&file); + bool searchForOverrides = + _ctx.searchArchivesToOverrideTentativeDefinitions(); + bool undefAdded = false; + forEachUndefines(file, searchForOverrides, + [&](StringRef undefName, bool dataSymbolOnly) { + if (File *member = archiveFile->find(undefName, dataSymbolOnly)) { + member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + member->beforeLink(); + updatePreloadArchiveMap(); + undefAdded = handleFile(*member) || undefAdded; + } + }); + return undefAdded; +} + +void Resolver::handleSharedLibrary(File &file) { + // Add all the atoms from the shared library + SharedLibraryFile *sharedLibrary = cast<SharedLibraryFile>(&file); + handleFile(*sharedLibrary); + bool searchForOverrides = + _ctx.searchSharedLibrariesToOverrideTentativeDefinitions(); + forEachUndefines(file, searchForOverrides, + [&](StringRef undefName, bool dataSymbolOnly) { + if (const SharedLibraryAtom *atom = + sharedLibrary->exports(undefName, dataSymbolOnly)) + doSharedLibraryAtom(*atom); + }); +} + +bool Resolver::doUndefinedAtom(const UndefinedAtom &atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " UndefinedAtom: " + << llvm::format("0x%09lX", &atom) + << ", name=" << atom.name() << "\n"); + + // add to list of known atoms + _atoms.push_back(&atom); + + // tell symbol table + bool newUndefAdded = _symbolTable.add(atom); + if (newUndefAdded) + _undefines.push_back(atom.name()); + + // If the undefined symbol has an alternative name, try to resolve the + // symbol with the name to give it a second chance. This feature is used + // for COFF "weak external" symbol. + if (newUndefAdded || !_symbolTable.isDefined(atom.name())) { + if (const UndefinedAtom *fallbackAtom = atom.fallback()) { + doUndefinedAtom(*fallbackAtom); + _symbolTable.addReplacement(&atom, fallbackAtom); + } + } + return newUndefAdded; +} + +/// \brief Add the section group and the group-child reference members. +void Resolver::maybeAddSectionGroupOrGnuLinkOnce(const DefinedAtom &atom) { + // First time adding a group? + bool isFirstTime = _symbolTable.addGroup(atom); + + if (!isFirstTime) { + // If duplicate symbols are allowed, select the first group. + if (_ctx.getAllowDuplicates()) + return; + auto *prevGroup = dyn_cast<DefinedAtom>(_symbolTable.findGroup(atom.name())); + assert(prevGroup && + "Internal Error: The group atom could only be a defined atom"); + // The atoms should be of the same content type, reject invalid group + // resolution behaviors. + if (atom.contentType() == prevGroup->contentType()) + return; + llvm::errs() << "SymbolTable: error while merging " << atom.name() + << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + return; + } + + for (const Reference *r : atom) { + if (r->kindNamespace() == lld::Reference::KindNamespace::all && + r->kindValue() == lld::Reference::kindGroupChild) { + const DefinedAtom *target = dyn_cast<DefinedAtom>(r->target()); + assert(target && "Internal Error: kindGroupChild references need to " + "be associated with Defined Atoms only"); + _atoms.push_back(target); + _symbolTable.add(*target); + } + } +} + +// Called on each atom when a file is added. Returns true if a given +// atom is added to the symbol table. +void Resolver::doDefinedAtom(const DefinedAtom &atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " DefinedAtom: " + << llvm::format("0x%09lX", &atom) + << ", file=#" + << atom.file().ordinal() + << ", atom=#" + << atom.ordinal() + << ", name=" + << atom.name() + << "\n"); + + // add to list of known atoms + _atoms.push_back(&atom); + + if (atom.isGroupParent()) { + maybeAddSectionGroupOrGnuLinkOnce(atom); + } else { + _symbolTable.add(atom); + } + + // An atom that should never be dead-stripped is a dead-strip root. + if (_ctx.deadStrip() && atom.deadStrip() == DefinedAtom::deadStripNever) { + _deadStripRoots.insert(&atom); + } +} + +void Resolver::doSharedLibraryAtom(const SharedLibraryAtom &atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " SharedLibraryAtom: " + << llvm::format("0x%09lX", &atom) + << ", name=" + << atom.name() + << "\n"); + + // add to list of known atoms + _atoms.push_back(&atom); + + // tell symbol table + _symbolTable.add(atom); +} + +void Resolver::doAbsoluteAtom(const AbsoluteAtom &atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " AbsoluteAtom: " + << llvm::format("0x%09lX", &atom) + << ", name=" + << atom.name() + << "\n"); + + // add to list of known atoms + _atoms.push_back(&atom); + + // tell symbol table + if (atom.scope() != Atom::scopeTranslationUnit) + _symbolTable.add(atom); +} + +// utility to add a vector of atoms +void Resolver::addAtoms(const std::vector<const DefinedAtom *> &newAtoms) { + for (const DefinedAtom *newAtom : newAtoms) + doDefinedAtom(*newAtom); +} + +// Instantiate an archive file member if there's a file containing a +// defined symbol for a given symbol name. Instantiation is done in a +// different worker thread and has no visible side effect. +void Resolver::maybePreloadArchiveMember(StringRef sym) { + auto it = _archiveMap.find(sym); + if (it == _archiveMap.end()) + return; + ArchiveLibraryFile *archive = it->second; + archive->preload(_ctx.getTaskGroup(), sym); +} + +// Returns true if at least one of N previous files has created an +// undefined symbol. +bool Resolver::undefinesAdded(int begin, int end) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + for (int i = begin; i < end; ++i) + if (FileNode *node = dyn_cast<FileNode>(inputs[i].get())) + if (_newUndefinesAdded[node->getFile()]) + return true; + return false; +} + +File *Resolver::getFile(int &index) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + if ((size_t)index >= inputs.size()) + return nullptr; + if (GroupEnd *group = dyn_cast<GroupEnd>(inputs[index].get())) { + // We are at the end of the current group. If one or more new + // undefined atom has been added in the last groupSize files, we + // reiterate over the files. + int size = group->getSize(); + if (undefinesAdded(index - size, index)) { + index -= size; + return getFile(index); + } + ++index; + return getFile(index); + } + return cast<FileNode>(inputs[index++].get())->getFile(); +} + +// Update a map of Symbol -> ArchiveFile. The map is used for speculative +// file loading. +void Resolver::updatePreloadArchiveMap() { + std::vector<std::unique_ptr<Node>> &nodes = _ctx.getNodes(); + for (int i = nodes.size() - 1; i >= 0; --i) { + auto *fnode = dyn_cast<FileNode>(nodes[i].get()); + if (!fnode) + continue; + auto *archive = dyn_cast<ArchiveLibraryFile>(fnode->getFile()); + if (!archive || _archiveSeen.count(archive)) + continue; + _archiveSeen.insert(archive); + for (StringRef sym : archive->getDefinedSymbols()) + _archiveMap[sym] = archive; + } +} + +// Keep adding atoms until _ctx.getNextFile() returns an error. This +// function is where undefined atoms are resolved. +bool Resolver::resolveUndefines() { + ScopedTask task(getDefaultDomain(), "resolveUndefines"); + int index = 0; + std::set<File *> seen; + for (;;) { + bool undefAdded = false; + File *file = getFile(index); + if (!file) + return true; + if (std::error_code ec = file->parse()) { + llvm::errs() << "Cannot open " + file->path() + << ": " << ec.message() << "\n"; + return false; + } + file->beforeLink(); + updatePreloadArchiveMap(); + switch (file->kind()) { + case File::kindObject: + // The same file may be visited more than once if the file is + // in --start-group and --end-group. Only library files should + // be processed more than once. + if (seen.count(file)) + break; + seen.insert(file); + assert(!file->hasOrdinal()); + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + undefAdded = handleFile(*file); + break; + case File::kindArchiveLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + undefAdded = handleArchiveFile(*file); + break; + case File::kindSharedLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + handleSharedLibrary(*file); + break; + } + _newUndefinesAdded[file] = undefAdded; + } +} + +// switch all references to undefined or coalesced away atoms +// to the new defined atom +void Resolver::updateReferences() { + ScopedTask task(getDefaultDomain(), "updateReferences"); + for (const Atom *atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) { + for (const Reference *ref : *defAtom) { + // A reference of type kindAssociate should't be updated. + // Instead, an atom having such reference will be removed + // if the target atom is coalesced away, so that they will + // go away as a group. + if (ref->kindNamespace() == lld::Reference::KindNamespace::all && + ref->kindValue() == lld::Reference::kindAssociate) { + if (_symbolTable.isCoalescedAway(atom)) + _deadAtoms.insert(ref->target()); + continue; + } + const Atom *newTarget = _symbolTable.replacement(ref->target()); + const_cast<Reference *>(ref)->setTarget(newTarget); + } + } + } +} + +// For dead code stripping, recursively mark atoms "live" +void Resolver::markLive(const Atom *atom) { + // Mark the atom is live. If it's already marked live, then stop recursion. + auto exists = _liveAtoms.insert(atom); + if (!exists.second) + return; + + // Mark all atoms it references as live + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) { + for (const Reference *ref : *defAtom) + markLive(ref->target()); + for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { + const Atom *target = p.second; + markLive(target); + } + } +} + +static bool isBackref(const Reference *ref) { + if (ref->kindNamespace() != lld::Reference::KindNamespace::all) + return false; + return (ref->kindValue() == lld::Reference::kindLayoutAfter || + ref->kindValue() == lld::Reference::kindGroupChild); +} + +// remove all atoms not actually used +void Resolver::deadStripOptimize() { + ScopedTask task(getDefaultDomain(), "deadStripOptimize"); + // only do this optimization with -dead_strip + if (!_ctx.deadStrip()) + return; + + // Some type of references prevent referring atoms to be dead-striped. + // Make a reverse map of such references before traversing the graph. + // While traversing the list of atoms, mark AbsoluteAtoms as live + // in order to avoid reclaim. + for (const Atom *atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) + for (const Reference *ref : *defAtom) + if (isBackref(ref)) + _reverseRef.insert(std::make_pair(ref->target(), atom)); + if (const AbsoluteAtom *absAtom = dyn_cast<AbsoluteAtom>(atom)) + markLive(absAtom); + } + + // By default, shared libraries are built with all globals as dead strip roots + if (_ctx.globalsAreDeadStripRoots()) + for (const Atom *atom : _atoms) + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) + if (defAtom->scope() == DefinedAtom::scopeGlobal) + _deadStripRoots.insert(defAtom); + + // Or, use list of names that are dead strip roots. + for (const StringRef &name : _ctx.deadStripRoots()) { + const Atom *symAtom = _symbolTable.findByName(name); + assert(symAtom); + _deadStripRoots.insert(symAtom); + } + + // mark all roots as live, and recursively all atoms they reference + for (const Atom *dsrAtom : _deadStripRoots) + markLive(dsrAtom); + + // now remove all non-live atoms from _atoms + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), [&](const Atom *a) { + return _liveAtoms.count(a) == 0; + }), + _atoms.end()); +} + +// error out if some undefines remain +bool Resolver::checkUndefines() { + // build vector of remaining undefined symbols + std::vector<const UndefinedAtom *> undefinedAtoms = _symbolTable.undefines(); + if (_ctx.deadStrip()) { + // When dead code stripping, we don't care if dead atoms are undefined. + undefinedAtoms.erase( + std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(), + [&](const Atom *a) { return _liveAtoms.count(a) == 0; }), + undefinedAtoms.end()); + } + + if (undefinedAtoms.empty()) + return false; + + // Warn about unresolved symbols. + bool foundUndefines = false; + for (const UndefinedAtom *undef : undefinedAtoms) { + // Skip over a weak symbol. + if (undef->canBeNull() != UndefinedAtom::canBeNullNever) + continue; + + // If this is a library and undefined symbols are allowed on the + // target platform, skip over it. + if (isa<SharedLibraryFile>(undef->file()) && _ctx.allowShlibUndefines()) + continue; + + // If the undefine is coalesced away, skip over it. + if (_symbolTable.isCoalescedAway(undef)) + continue; + + // Seems like this symbol is undefined. Warn that. + foundUndefines = true; + if (_ctx.printRemainingUndefines()) { + llvm::errs() << "Undefined symbol: " << undef->file().path() + << ": " << _ctx.demangle(undef->name()) + << "\n"; + } + } + if (!foundUndefines) + return false; + if (_ctx.printRemainingUndefines()) + llvm::errs() << "symbol(s) not found\n"; + return true; +} + +// remove from _atoms all coaleseced away atoms +void Resolver::removeCoalescedAwayAtoms() { + ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), [&](const Atom *a) { + return _symbolTable.isCoalescedAway(a) || _deadAtoms.count(a); + }), + _atoms.end()); +} + +bool Resolver::resolve() { + updatePreloadArchiveMap(); + if (!resolveUndefines()) + return false; + updateReferences(); + deadStripOptimize(); + if (checkUndefines()) + if (!_ctx.allowRemainingUndefines()) + return false; + removeCoalescedAwayAtoms(); + _result->addAtoms(_atoms); + return true; +} + +void Resolver::MergedFile::addAtoms(std::vector<const Atom *> &all) { + ScopedTask task(getDefaultDomain(), "addAtoms"); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); + for (const Atom *atom : all) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", atom) + << ", name=" + << atom->name() + << "\n"); + addAtom(*atom); + } +} + +} // namespace lld diff --git a/lib/Core/SymbolTable.cpp b/lib/Core/SymbolTable.cpp new file mode 100644 index 000000000000..f3f2da9262e0 --- /dev/null +++ b/lib/Core/SymbolTable.cpp @@ -0,0 +1,390 @@ +//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/SymbolTable.h" +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdlib> +#include <vector> + +namespace lld { +SymbolTable::SymbolTable(LinkingContext &context) : _context(context) {} + +bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const DefinedAtom &atom) { + if (!atom.name().empty() && + atom.scope() != DefinedAtom::scopeTranslationUnit) { + // Named atoms cannot be merged by content. + assert(atom.merge() != DefinedAtom::mergeByContent); + // Track named atoms that are not scoped to file (static). + return addByName(atom); + } + if (atom.merge() == DefinedAtom::mergeByContent) { + // Named atoms cannot be merged by content. + assert(atom.name().empty()); + // Currently only read-only constants can be merged. + if (atom.permissions() == DefinedAtom::permR__) + return addByContent(atom); + // TODO: support mergeByContent of data atoms by comparing content & fixups. + } + return false; +} + +const Atom *SymbolTable::findGroup(StringRef sym) { + NameToAtom::iterator pos = _groupTable.find(sym); + if (pos == _groupTable.end()) + return nullptr; + return pos->second; +} + +bool SymbolTable::addGroup(const DefinedAtom &da) { + StringRef name = da.name(); + assert(!name.empty()); + const Atom *existing = findGroup(name); + if (existing == nullptr) { + _groupTable[name] = &da; + return true; + } + _replacedAtoms[&da] = existing; + return false; +} + +enum NameCollisionResolution { + NCR_First, + NCR_Second, + NCR_DupDef, + NCR_DupUndef, + NCR_DupShLib, + NCR_Error +}; + +static NameCollisionResolution cases[4][4] = { + //regular absolute undef sharedLib + { + // first is regular + NCR_DupDef, NCR_Error, NCR_First, NCR_First + }, + { + // first is absolute + NCR_Error, NCR_Error, NCR_First, NCR_First + }, + { + // first is undef + NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second + }, + { + // first is sharedLib + NCR_Second, NCR_Second, NCR_First, NCR_DupShLib + } +}; + +static NameCollisionResolution collide(Atom::Definition first, + Atom::Definition second) { + return cases[first][second]; +} + +enum MergeResolution { + MCR_First, + MCR_Second, + MCR_Largest, + MCR_SameSize, + MCR_Error +}; + +static MergeResolution mergeCases[][6] = { + // no tentative weak weakAddress sameNameAndSize largest + {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no + {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative + {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak + {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress + {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize + {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest +}; + +static MergeResolution mergeSelect(DefinedAtom::Merge first, + DefinedAtom::Merge second) { + assert(first != DefinedAtom::mergeByContent); + assert(second != DefinedAtom::mergeByContent); + return mergeCases[first][second]; +} + +bool SymbolTable::addByName(const Atom &newAtom) { + StringRef name = newAtom.name(); + assert(!name.empty()); + const Atom *existing = findByName(name); + if (existing == nullptr) { + // Name is not in symbol table yet, add it associate with this atom. + _nameTable[name] = &newAtom; + return true; + } + + // Do nothing if the same object is added more than once. + if (existing == &newAtom) + return false; + + // Name is already in symbol table and associated with another atom. + bool useNew = true; + switch (collide(existing->definition(), newAtom.definition())) { + case NCR_First: + useNew = false; + break; + case NCR_Second: + useNew = true; + break; + case NCR_DupDef: { + const auto *existingDef = cast<DefinedAtom>(existing); + const auto *newDef = cast<DefinedAtom>(&newAtom); + switch (mergeSelect(existingDef->merge(), newDef->merge())) { + case MCR_First: + useNew = false; + break; + case MCR_Second: + useNew = true; + break; + case MCR_Largest: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + useNew = (newSize >= existingSize); + break; + } + case MCR_SameSize: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + if (existingSize == newSize) { + useNew = true; + break; + } + llvm::errs() << "Size mismatch: " + << existing->name() << " (" << existingSize << ") " + << newAtom.name() << " (" << newSize << ")\n"; + // fallthrough + } + case MCR_Error: + if (!_context.getAllowDuplicates()) { + llvm::errs() << "Duplicate symbols: " + << existing->name() + << ":" + << existing->file().path() + << " and " + << newAtom.name() + << ":" + << newAtom.file().path() + << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + } + useNew = false; + break; + } + break; + } + case NCR_DupUndef: { + const UndefinedAtom* existingUndef = cast<UndefinedAtom>(existing); + const UndefinedAtom* newUndef = cast<UndefinedAtom>(&newAtom); + + bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); + if (!sameCanBeNull && + _context.warnIfCoalesableAtomsHaveDifferentCanBeNull()) { + llvm::errs() << "lld warning: undefined symbol " + << existingUndef->name() + << " has different weakness in " + << existingUndef->file().path() + << " and in " << newUndef->file().path() << "\n"; + } + + const UndefinedAtom *existingFallback = existingUndef->fallback(); + const UndefinedAtom *newFallback = newUndef->fallback(); + bool hasDifferentFallback = + (existingFallback && newFallback && + existingFallback->name() != newFallback->name()); + if (hasDifferentFallback) { + llvm::errs() << "lld warning: undefined symbol " + << existingUndef->name() << " has different fallback: " + << existingFallback->name() << " in " + << existingUndef->file().path() << " and " + << newFallback->name() << " in " + << newUndef->file().path() << "\n"; + } + + bool hasNewFallback = newUndef->fallback(); + if (sameCanBeNull) + useNew = hasNewFallback; + else + useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); + break; + } + case NCR_DupShLib: { + const SharedLibraryAtom *curShLib = cast<SharedLibraryAtom>(existing); + const SharedLibraryAtom *newShLib = cast<SharedLibraryAtom>(&newAtom); + bool sameNullness = + (curShLib->canBeNullAtRuntime() == newShLib->canBeNullAtRuntime()); + bool sameName = curShLib->loadName().equals(newShLib->loadName()); + if (sameName && !sameNullness && + _context.warnIfCoalesableAtomsHaveDifferentCanBeNull()) { + // FIXME: need diagonstics interface for writing warning messages + llvm::errs() << "lld warning: shared library symbol " + << curShLib->name() << " has different weakness in " + << curShLib->file().path() << " and in " + << newShLib->file().path(); + } + if (!sameName && _context.warnIfCoalesableAtomsHaveDifferentLoadName()) { + // FIXME: need diagonstics interface for writing warning messages + llvm::errs() << "lld warning: shared library symbol " + << curShLib->name() << " has different load path in " + << curShLib->file().path() << " and in " + << newShLib->file().path(); + } + useNew = false; + break; + } + case NCR_Error: + llvm::errs() << "SymbolTable: error while merging " << name << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + + // Give context a chance to change which is kept. + _context.notifySymbolTableCoalesce(existing, &newAtom, useNew); + + if (useNew) { + // Update name table to use new atom. + _nameTable[name] = &newAtom; + // Add existing atom to replacement table. + _replacedAtoms[existing] = &newAtom; + } else { + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + } + return false; +} + +unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { + auto content = atom->rawContent(); + return llvm::hash_combine(atom->size(), + atom->contentType(), + llvm::hash_combine_range(content.begin(), + content.end())); +} + +bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, + const DefinedAtom * const r) { + if (l == r) + return true; + if (l == getEmptyKey()) + return false; + if (r == getEmptyKey()) + return false; + if (l == getTombstoneKey()) + return false; + if (r == getTombstoneKey()) + return false; + if (l->contentType() != r->contentType()) + return false; + if (l->size() != r->size()) + return false; + if (l->sectionChoice() != r->sectionChoice()) + return false; + if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { + if (!l->customSectionName().equals(r->customSectionName())) + return false; + } + ArrayRef<uint8_t> lc = l->rawContent(); + ArrayRef<uint8_t> rc = r->rawContent(); + return memcmp(lc.data(), rc.data(), lc.size()) == 0; +} + +bool SymbolTable::addByContent(const DefinedAtom &newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if (pos == _contentTable.end()) { + _contentTable.insert(&newAtom); + return true; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + return false; +} + +const Atom *SymbolTable::findByName(StringRef sym) { + NameToAtom::iterator pos = _nameTable.find(sym); + if (pos == _nameTable.end()) + return nullptr; + return pos->second; +} + +bool SymbolTable::isDefined(StringRef sym) { + if (const Atom *atom = findByName(sym)) + return !isa<UndefinedAtom>(atom); + return false; +} + +void SymbolTable::addReplacement(const Atom *replaced, + const Atom *replacement) { + _replacedAtoms[replaced] = replacement; +} + +const Atom *SymbolTable::replacement(const Atom *atom) { + // Find the replacement for a given atom. Atoms in _replacedAtoms + // may be chained, so find the last one. + for (;;) { + AtomToAtom::iterator pos = _replacedAtoms.find(atom); + if (pos == _replacedAtoms.end()) + return atom; + atom = pos->second; + } +} + +bool SymbolTable::isCoalescedAway(const Atom *atom) { + return _replacedAtoms.count(atom) > 0; +} + +std::vector<const UndefinedAtom *> SymbolTable::undefines() { + std::vector<const UndefinedAtom *> ret; + for (auto it : _nameTable) { + const Atom *atom = it.second; + assert(atom != nullptr); + if (const auto *undef = dyn_cast<const UndefinedAtom>(atom)) + if (_replacedAtoms.count(undef) == 0) + ret.push_back(undef); + } + return ret; +} + +std::vector<StringRef> SymbolTable::tentativeDefinitions() { + std::vector<StringRef> ret; + for (auto entry : _nameTable) { + const Atom *atom = entry.second; + StringRef name = entry.first; + assert(atom != nullptr); + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) + if (defAtom->merge() == DefinedAtom::mergeAsTentative) + ret.push_back(name); + } + return ret; +} + +} // namespace lld diff --git a/lib/Core/TODO.txt b/lib/Core/TODO.txt new file mode 100644 index 000000000000..196a3e02c2fc --- /dev/null +++ b/lib/Core/TODO.txt @@ -0,0 +1,18 @@ +lib/Core +~~~~~~~~ + +* Add endianness support to the native reader and writer. + +* The NativeReader has lots of similar code for converting arrays of ivar + data in mapped memory into arrays of objects. The commonality can be + factored out, maybe templatized. + +* The NativeFileFormat.h is old school C structs and constants. We scope + things better by defining constants used with a struct inside the struct + declaration. + +* The native reader and writer currently just blast in memory enumeration + values (e.g. DefinedAtom::Scope) into a byte in the disk format. To support + future changes to the enumerations, there should be a translation layer + to map disk values to in-memory values. + diff --git a/lib/Core/Writer.cpp b/lib/Core/Writer.cpp new file mode 100644 index 000000000000..39bcc9e68523 --- /dev/null +++ b/lib/Core/Writer.cpp @@ -0,0 +1,23 @@ +//===- lib/Core/Writer.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" + +namespace lld { +Writer::Writer() { +} + +Writer::~Writer() { +} + +bool Writer::createImplicitFiles(std::vector<std::unique_ptr<File> > &) { + return true; +} +} // end namespace lld diff --git a/lib/Driver/CMakeLists.txt b/lib/Driver/CMakeLists.txt new file mode 100644 index 000000000000..5a410e7eed7e --- /dev/null +++ b/lib/Driver/CMakeLists.txt @@ -0,0 +1,43 @@ +set(LLVM_TARGET_DEFINITIONS UniversalDriverOptions.td) +tablegen(LLVM UniversalDriverOptions.inc -gen-opt-parser-defs) +set(LLVM_TARGET_DEFINITIONS GnuLdOptions.td) +tablegen(LLVM GnuLdOptions.inc -gen-opt-parser-defs) +set(LLVM_TARGET_DEFINITIONS CoreOptions.td) +tablegen(LLVM CoreOptions.inc -gen-opt-parser-defs) +set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) +tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) +set(LLVM_TARGET_DEFINITIONS WinLinkOptions.td) +tablegen(LLVM WinLinkOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(DriverOptionsTableGen) + +add_llvm_library(lldDriver + CoreDriver.cpp + DarwinLdDriver.cpp + Driver.cpp + GnuLdDriver.cpp + UniversalDriver.cpp + WinLinkDriver.cpp + WinLinkModuleDef.cpp + LINK_LIBS + lldConfig + lldMachO + lldPECOFF + lldELF + lldAArch64ELFTarget + lldARMELFTarget + lldHexagonELFTarget + lldMipsELFTarget + lldX86ELFTarget + lldExampleSubTarget + lldX86_64ELFTarget + lldCore + lldNative + lldReaderWriter + lldYAML + LLVMObject + LLVMOption + LLVMSupport + ) + +add_dependencies(lldDriver DriverOptionsTableGen) + diff --git a/lib/Driver/CoreDriver.cpp b/lib/Driver/CoreDriver.cpp new file mode 100644 index 000000000000..b8adee55746f --- /dev/null +++ b/lib/Driver/CoreDriver.cpp @@ -0,0 +1,172 @@ +//===- lib/Driver/CoreDriver.cpp ------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Reader.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/CoreLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in CoreOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "CoreOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in CoreOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "CoreOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in CoreOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "CoreOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class CoreOptTable : public llvm::opt::OptTable { +public: + CoreOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable)){} +}; + +} // namespace anonymous + + +namespace lld { + +static const Registry::KindStrings coreKindStrings[] = { + { CoreLinkingContext::TEST_RELOC_CALL32, "call32" }, + { CoreLinkingContext::TEST_RELOC_PCREL32, "pcrel32" }, + { CoreLinkingContext::TEST_RELOC_GOT_LOAD32, "gotLoad32" }, + { CoreLinkingContext::TEST_RELOC_GOT_USE32, "gotUse32" }, + { CoreLinkingContext::TEST_RELOC_LEA32_WAS_GOT, "lea32wasGot" }, + LLD_KIND_STRING_END +}; + +bool CoreDriver::link(int argc, const char *argv[], raw_ostream &diagnostics) { + CoreLinkingContext ctx; + + // Register possible input file parsers. + ctx.registry().addSupportNativeObjects(); + ctx.registry().addSupportYamlFiles(); + ctx.registry().addKindTable(Reference::KindNamespace::testing, + Reference::KindArch::all, coreKindStrings); + + if (!parse(argc, argv, ctx)) + return false; + return Driver::link(ctx); +} + +bool CoreDriver::parse(int argc, const char *argv[], CoreLinkingContext &ctx, + raw_ostream &diagnostics) { + // Parse command line options using CoreOptions.td + std::unique_ptr<llvm::opt::InputArgList> parsedArgs; + CoreOptTable table; + unsigned missingIndex; + unsigned missingCount; + parsedArgs.reset( + table.ParseArgs(&argv[1], &argv[argc], missingIndex, missingCount)); + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs->getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + // Set default options + ctx.setOutputPath("-"); + ctx.setDeadStripping(false); + ctx.setGlobalsAreDeadStripRoots(false); + ctx.setPrintRemainingUndefines(false); + ctx.setAllowRemainingUndefines(true); + ctx.setSearchArchivesToOverrideTentativeDefinitions(false); + + // Process all the arguments and create input files. + for (auto inputArg : *parsedArgs) { + switch (inputArg->getOption().getID()) { + case OPT_mllvm: + ctx.appendLLVMOption(inputArg->getValue()); + break; + + case OPT_entry: + ctx.setEntrySymbolName(inputArg->getValue()); + break; + + case OPT_output: + ctx.setOutputPath(inputArg->getValue()); + break; + + case OPT_dead_strip: + ctx.setDeadStripping(true); + break; + + case OPT_keep_globals: + ctx.setGlobalsAreDeadStripRoots(true); + break; + + case OPT_undefines_are_errors: + ctx.setPrintRemainingUndefines(true); + ctx.setAllowRemainingUndefines(false); + break; + + case OPT_commons_search_archives: + ctx.setSearchArchivesToOverrideTentativeDefinitions(true); + break; + + case OPT_add_pass: + ctx.addPassNamed(inputArg->getValue()); + break; + + case OPT_INPUT: { + std::vector<std::unique_ptr<File>> files + = loadFile(ctx, inputArg->getValue(), false); + for (std::unique_ptr<File> &file : files) + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); + break; + } + + default: + break; + } + } + + if (ctx.getNodes().empty()) { + diagnostics << "No input files\n"; + return false; + } + + // Validate the combination of options used. + return ctx.validate(diagnostics); +} + +} // namespace lld diff --git a/lib/Driver/CoreOptions.td b/lib/Driver/CoreOptions.td new file mode 100644 index 000000000000..df7cb41737d2 --- /dev/null +++ b/lib/Driver/CoreOptions.td @@ -0,0 +1,15 @@ +include "llvm/Option/OptParser.td" + +def output : Separate<["-"], "o">; +def entry : Separate<["-"], "e">; + +def dead_strip : Flag<["--"], "dead-strip">; +def undefines_are_errors : Flag<["--"], "undefines-are-errors">; +def keep_globals : Flag<["--"], "keep-globals">; +def commons_search_archives : Flag<["--"], "commons-search-archives">; + +def add_pass : Separate<["--"], "add-pass">; + +def target : Separate<["-"], "target">, HelpText<"Target triple to link for">; +def mllvm : Separate<["-"], "mllvm">, HelpText<"Options to pass to LLVM">; + diff --git a/lib/Driver/DarwinLdDriver.cpp b/lib/Driver/DarwinLdDriver.cpp new file mode 100644 index 000000000000..2c64aeee38a5 --- /dev/null +++ b/lib/Driver/DarwinLdDriver.cpp @@ -0,0 +1,832 @@ +//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for darwin's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in DarwinLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in DarwinLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "DarwinLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in DarwinLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class DarwinLdOptTable : public llvm::opt::OptTable { +public: + DarwinLdOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable)){} +}; + +std::vector<std::unique_ptr<File>> +loadFile(MachOLinkingContext &ctx, StringRef path, + raw_ostream &diag, bool wholeArchive, bool upwardDylib) { + if (ctx.logInputFiles()) + diag << path << "\n"; + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = ctx.getMemoryBuffer(path); + if (std::error_code ec = mbOrErr.getError()) + return makeErrorFile(path, ec); + std::vector<std::unique_ptr<File>> files; + if (std::error_code ec = ctx.registry().loadFile(std::move(mbOrErr.get()), files)) + return makeErrorFile(path, ec); + for (std::unique_ptr<File> &pf : files) { + // If file is a dylib, inform LinkingContext about it. + if (SharedLibraryFile *shl = dyn_cast<SharedLibraryFile>(pf.get())) { + if (std::error_code ec = shl->parse()) + return makeErrorFile(path, ec); + ctx.registerDylib(reinterpret_cast<mach_o::MachODylibFile*>(shl), + upwardDylib); + } + } + if (wholeArchive) + return parseMemberFiles(files); + return files; +} + +} // anonymous namespace + +// Test may be running on Windows. Canonicalize the path +// separator to '/' to get consistent outputs for tests. +static std::string canonicalizePath(StringRef path) { + char sep = llvm::sys::path::get_separator().front(); + if (sep != '/') { + std::string fixedPath = path; + std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); + return fixedPath; + } else { + return path; + } +} + +static void addFile(StringRef path, MachOLinkingContext &ctx, + bool loadWholeArchive, + bool upwardDylib, raw_ostream &diag) { + std::vector<std::unique_ptr<File>> files = + loadFile(ctx, path, diag, loadWholeArchive, upwardDylib); + for (std::unique_ptr<File> &file : files) + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); +} + +// Export lists are one symbol per line. Blank lines are ignored. +// Trailing comments start with #. +static std::error_code parseExportsList(StringRef exportFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in export list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(exportFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(exportFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + StringRef sym = symAndComment.first.trim(); + if (!sym.empty()) + ctx.addExportSymbol(sym); + buffer = lineAndRest.second; + } + return std::error_code(); +} + + + +/// Order files are one symbol per line. Blank lines are ignored. +/// Trailing comments start with #. Symbol names can be prefixed with an +/// architecture name and/or .o leaf name. Examples: +/// _foo +/// bar.o:_bar +/// libfrob.a(bar.o):_bar +/// x86_64:_foo64 +static std::error_code parseOrderFile(StringRef orderFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in order file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(orderFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(orderFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + buffer = lineAndRest.second; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + if (symAndComment.first.empty()) + continue; + StringRef sym = symAndComment.first.trim(); + if (sym.empty()) + continue; + // Check for prefix. + StringRef prefix; + std::pair<StringRef, StringRef> prefixAndSym = sym.split(':'); + if (!prefixAndSym.second.empty()) { + sym = prefixAndSym.second; + prefix = prefixAndSym.first; + if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { + // If arch name prefix does not match arch being linked, ignore symbol. + if (!ctx.archName().equals(prefix)) + continue; + prefix = ""; + } + } else + sym = prefixAndSym.first; + if (!sym.empty()) { + ctx.appendOrderedSymbol(sym, prefix); + //llvm::errs() << sym << ", prefix=" << prefix << "\n"; + } + } + return std::error_code(); +} + +// +// There are two variants of the -filelist option: +// +// -filelist <path> +// In this variant, the path is to a text file which contains one file path +// per line. There are no comments or trimming of whitespace. +// +// -fileList <path>,<dir> +// In this variant, the path is to a text file which contains a partial path +// per line. The <dir> prefix is prepended to each partial path. +// +static std::error_code loadFileList(StringRef fileListPath, + MachOLinkingContext &ctx, bool forceLoad, + raw_ostream &diagnostics) { + // If there is a comma, split off <dir>. + std::pair<StringRef, StringRef> opt = fileListPath.split(','); + StringRef filePath = opt.first; + StringRef dirName = opt.second; + ctx.addInputFileDependency(filePath); + // Map in file list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(filePath); + if (std::error_code ec = mb.getError()) + return ec; + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + StringRef path; + if (!dirName.empty()) { + // If there is a <dir> then prepend dir to each line. + SmallString<256> fullPath; + fullPath.assign(dirName); + llvm::sys::path::append(fullPath, Twine(line)); + path = ctx.copy(fullPath.str()); + } else { + // No <dir> use whole line as input file path. + path = ctx.copy(line); + } + if (!ctx.pathExists(path)) { + return make_dynamic_error_code(Twine("File not found '") + + path + + "'"); + } + if (ctx.testingFileUsage()) { + diagnostics << "Found filelist entry " << canonicalizePath(path) << '\n'; + } + addFile(path, ctx, forceLoad, false, diagnostics); + buffer = lineAndRest.second; + } + return std::error_code(); +} + +/// Parse number assuming it is base 16, but allow 0x prefix. +static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { + if (numStr.startswith_lower("0x")) + numStr = numStr.drop_front(2); + return numStr.getAsInteger(16, baseAddress); +} + +namespace lld { + +bool DarwinLdDriver::linkMachO(int argc, const char *argv[], + raw_ostream &diagnostics) { + MachOLinkingContext ctx; + if (!parse(argc, argv, ctx, diagnostics)) + return false; + if (ctx.doNothing()) + return true; + return link(ctx, diagnostics); +} + +bool DarwinLdDriver::parse(int argc, const char *argv[], + MachOLinkingContext &ctx, raw_ostream &diagnostics) { + // Parse command line options using DarwinLdOptions.td + std::unique_ptr<llvm::opt::InputArgList> parsedArgs; + DarwinLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + bool globalWholeArchive = false; + parsedArgs.reset( + table.ParseArgs(&argv[1], &argv[argc], missingIndex, missingCount)); + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs->getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + for (auto unknownArg : parsedArgs->filtered(OPT_UNKNOWN)) { + diagnostics << "warning: ignoring unknown argument: " + << unknownArg->getAsString(*parsedArgs) << "\n"; + } + + // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) + llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; + if ( llvm::opt::Arg *kind = parsedArgs->getLastArg(OPT_dylib, OPT_relocatable, + OPT_bundle, OPT_static, OPT_preload)) { + switch (kind->getOption().getID()) { + case OPT_dylib: + fileType = llvm::MachO::MH_DYLIB; + break; + case OPT_relocatable: + fileType = llvm::MachO::MH_OBJECT; + break; + case OPT_bundle: + fileType = llvm::MachO::MH_BUNDLE; + break; + case OPT_static: + fileType = llvm::MachO::MH_EXECUTE; + break; + case OPT_preload: + fileType = llvm::MachO::MH_PRELOAD; + break; + } + } + + // Handle -arch xxx + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + if (llvm::opt::Arg *archStr = parsedArgs->getLastArg(OPT_arch)) { + arch = MachOLinkingContext::archFromName(archStr->getValue()); + if (arch == MachOLinkingContext::arch_unknown) { + diagnostics << "error: unknown arch named '" << archStr->getValue() + << "'\n"; + return false; + } + } + // If no -arch specified, scan input files to find first non-fat .o file. + if (arch == MachOLinkingContext::arch_unknown) { + for (auto &inFile: parsedArgs->filtered(OPT_INPUT)) { + // This is expensive because it opens and maps the file. But that is + // ok because no -arch is rare. + if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) + break; + } + if (arch == MachOLinkingContext::arch_unknown + && !parsedArgs->getLastArg(OPT_test_file_usage)) { + // If no -arch and no options at all, print usage message. + if (parsedArgs->size() == 0) + table.PrintHelp(llvm::outs(), argv[0], "LLVM Linker", false); + else + diagnostics << "error: -arch not specified and could not be inferred\n"; + return false; + } + } + + // Handle -macosx_version_min or -ios_version_min + MachOLinkingContext::OS os = MachOLinkingContext::OS::macOSX; + uint32_t minOSVersion = 0; + if (llvm::opt::Arg *minOS = + parsedArgs->getLastArg(OPT_macosx_version_min, OPT_ios_version_min, + OPT_ios_simulator_version_min)) { + switch (minOS->getOption().getID()) { + case OPT_macosx_version_min: + os = MachOLinkingContext::OS::macOSX; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed macosx_version_min value\n"; + return false; + } + break; + case OPT_ios_version_min: + os = MachOLinkingContext::OS::iOS; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_version_min value\n"; + return false; + } + break; + case OPT_ios_simulator_version_min: + os = MachOLinkingContext::OS::iOS_simulator; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_simulator_version_min value\n"; + return false; + } + break; + } + } else { + // No min-os version on command line, check environment variables + } + + // Now that there's enough information parsed in, let the linking context + // set up default values. + ctx.configure(fileType, arch, os, minOSVersion); + + // Handle -e xxx + if (llvm::opt::Arg *entry = parsedArgs->getLastArg(OPT_entry)) + ctx.setEntrySymbolName(entry->getValue()); + + // Handle -o xxx + if (llvm::opt::Arg *outpath = parsedArgs->getLastArg(OPT_output)) + ctx.setOutputPath(outpath->getValue()); + else + ctx.setOutputPath("a.out"); + + // Handle -image_base XXX and -seg1addr XXXX + if (llvm::opt::Arg *imageBase = parsedArgs->getLastArg(OPT_image_base)) { + uint64_t baseAddress; + if (parseNumberBase16(imageBase->getValue(), baseAddress)) { + diagnostics << "error: image_base expects a hex number\n"; + return false; + } else if (baseAddress < ctx.pageZeroSize()) { + diagnostics << "error: image_base overlaps with __PAGEZERO\n"; + return false; + } else if (baseAddress % ctx.pageSize()) { + diagnostics << "error: image_base must be a multiple of page size (" + << llvm::format("0x%" PRIx64, ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setBaseAddress(baseAddress); + } + + // Handle -dead_strip + if (parsedArgs->getLastArg(OPT_dead_strip)) + ctx.setDeadStripping(true); + + // Handle -all_load + if (parsedArgs->getLastArg(OPT_all_load)) + globalWholeArchive = true; + + // Handle -install_name + if (llvm::opt::Arg *installName = parsedArgs->getLastArg(OPT_install_name)) + ctx.setInstallName(installName->getValue()); + else + ctx.setInstallName(ctx.outputPath()); + + // Handle -mark_dead_strippable_dylib + if (parsedArgs->getLastArg(OPT_mark_dead_strippable_dylib)) + ctx.setDeadStrippableDylib(true); + + // Handle -compatibility_version and -current_version + if (llvm::opt::Arg *vers = + parsedArgs->getLastArg(OPT_compatibility_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -compatibility_version value is malformed\n"; + return false; + } + ctx.setCompatibilityVersion(parsedVers); + } + + if (llvm::opt::Arg *vers = parsedArgs->getLastArg(OPT_current_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "-current_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -current_version value is malformed\n"; + return false; + } + ctx.setCurrentVersion(parsedVers); + } + + // Handle -bundle_loader + if (llvm::opt::Arg *loader = parsedArgs->getLastArg(OPT_bundle_loader)) + ctx.setBundleLoader(loader->getValue()); + + // Handle -sectalign segname sectname align + for (auto &alignArg : parsedArgs->filtered(OPT_sectalign)) { + const char* segName = alignArg->getValue(0); + const char* sectName = alignArg->getValue(1); + const char* alignStr = alignArg->getValue(2); + if ((alignStr[0] == '0') && (alignStr[1] == 'x')) + alignStr += 2; + unsigned long long alignValue; + if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { + diagnostics << "error: -sectalign alignment value '" + << alignStr << "' not a valid number\n"; + return false; + } + uint8_t align2 = llvm::countTrailingZeros(alignValue); + if ( (unsigned long)(1 << align2) != alignValue ) { + diagnostics << "warning: alignment for '-sectalign " + << segName << " " << sectName + << llvm::format(" 0x%llX", alignValue) + << "' is not a power of two, using " + << llvm::format("0x%08X", (1 << align2)) << "\n"; + } + ctx.addSectionAlignment(segName, sectName, align2); + } + + // Handle -mllvm + for (auto &llvmArg : parsedArgs->filtered(OPT_mllvm)) { + ctx.appendLLVMOption(llvmArg->getValue()); + } + + // Handle -print_atoms + if (parsedArgs->getLastArg(OPT_print_atoms)) + ctx.setPrintAtoms(); + + // Handle -t (trace) option. + if (parsedArgs->getLastArg(OPT_t)) + ctx.setLogInputFiles(true); + + // Handle -demangle option. + if (parsedArgs->getLastArg(OPT_demangle)) + ctx.setDemangleSymbols(true); + + // Handle -keep_private_externs + if (parsedArgs->getLastArg(OPT_keep_private_externs)) { + ctx.setKeepPrivateExterns(true); + if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + diagnostics << "warning: -keep_private_externs only used in -r mode\n"; + } + + // Handle -dependency_info <path> used by Xcode. + if (llvm::opt::Arg *depInfo = parsedArgs->getLastArg(OPT_dependency_info)) { + if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) { + diagnostics << "warning: " << ec.message() + << ", processing '-dependency_info " + << depInfo->getValue() + << "'\n"; + } + } + + // In -test_file_usage mode, we'll be given an explicit list of paths that + // exist. We'll also be expected to print out information about how we located + // libraries and so on that the user specified, but not to actually do any + // linking. + if (parsedArgs->getLastArg(OPT_test_file_usage)) { + ctx.setTestingFileUsage(); + + // With paths existing by fiat, linking is not going to end well. + ctx.setDoNothing(true); + + // Only bother looking for an existence override if we're going to use it. + for (auto existingPath : parsedArgs->filtered(OPT_path_exists)) { + ctx.addExistingPathForDebug(existingPath->getValue()); + } + } + + // Register possible input file parsers. + if (!ctx.doNothing()) { + ctx.registry().addSupportMachOObjects(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportNativeObjects(); + ctx.registry().addSupportYamlFiles(); + } + + // Now construct the set of library search directories, following ld64's + // baroque set of accumulated hacks. Mostly, the algorithm constructs + // { syslibroots } x { libpaths } + // + // Unfortunately, there are numerous exceptions: + // 1. Only absolute paths get modified by syslibroot options. + // 2. If there is just 1 -syslibroot, system paths not found in it are + // skipped. + // 3. If the last -syslibroot is "/", all of them are ignored entirely. + // 4. If { syslibroots } x path == {}, the original path is kept. + std::vector<StringRef> sysLibRoots; + for (auto syslibRoot : parsedArgs->filtered(OPT_syslibroot)) { + sysLibRoots.push_back(syslibRoot->getValue()); + } + if (!sysLibRoots.empty()) { + // Ignore all if last -syslibroot is "/". + if (sysLibRoots.back() != "/") + ctx.setSysLibRoots(sysLibRoots); + } + + // Paths specified with -L come first, and are not considered system paths for + // the case where there is precisely 1 -syslibroot. + for (auto libPath : parsedArgs->filtered(OPT_L)) { + ctx.addModifiedSearchDir(libPath->getValue()); + } + + // Process -F directories (where to look for frameworks). + for (auto fwPath : parsedArgs->filtered(OPT_F)) { + ctx.addFrameworkSearchDir(fwPath->getValue()); + } + + // -Z suppresses the standard search paths. + if (!parsedArgs->hasArg(OPT_Z)) { + ctx.addModifiedSearchDir("/usr/lib", true); + ctx.addModifiedSearchDir("/usr/local/lib", true); + ctx.addFrameworkSearchDir("/Library/Frameworks", true); + ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); + } + + // Now that we've constructed the final set of search paths, print out those + // search paths in verbose mode. + if (parsedArgs->getLastArg(OPT_v)) { + diagnostics << "Library search paths:\n"; + for (auto path : ctx.searchDirs()) { + diagnostics << " " << path << '\n'; + } + diagnostics << "Framework search paths:\n"; + for (auto path : ctx.frameworkDirs()) { + diagnostics << " " << path << '\n'; + } + } + + // Handle -exported_symbols_list <file> + for (auto expFile : parsedArgs->filtered(OPT_exported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbols_list cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-exported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -exported_symbol <symbol> + for (auto symbol : parsedArgs->filtered(OPT_exported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbol cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle -unexported_symbols_list <file> + for (auto expFile : parsedArgs->filtered(OPT_unexported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbols_list cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-unexported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -unexported_symbol <symbol> + for (auto symbol : parsedArgs->filtered(OPT_unexported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbol cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle obosolete -multi_module and -single_module + if (llvm::opt::Arg *mod = parsedArgs->getLastArg(OPT_multi_module, + OPT_single_module)) { + if (mod->getOption().getID() == OPT_multi_module) { + diagnostics << "warning: -multi_module is obsolete and being ignored\n"; + } + else { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "warning: -single_module being ignored. " + "It is only for use when producing a dylib\n"; + } + } + } + + // Handle -pie or -no_pie + if (llvm::opt::Arg *pie = parsedArgs->getLastArg(OPT_pie, OPT_no_pie)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + switch (ctx.os()) { + case MachOLinkingContext::OS::macOSX: + if ((minOSVersion < 0x000A0500) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "Mac OS X 10.5 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS: + if ((minOSVersion < 0x00040200) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "iOS 4.2 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS_simulator: + if (pie->getOption().getID() == OPT_no_pie) + diagnostics << "iOS simulator programs must be built PIE\n"; + return false; + break; + case MachOLinkingContext::OS::unknown: + break; + } + ctx.setPIE(pie->getOption().getID() == OPT_pie); + break; + case llvm::MachO::MH_PRELOAD: + break; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + diagnostics << "warning: " << pie->getSpelling() << " being ignored. " + << "It is only used when linking main executables\n"; + break; + default: + diagnostics << pie->getSpelling() + << " can only used when linking main executables\n"; + return false; + break; + } + } + + // Handle debug info handling options: -S + if (parsedArgs->hasArg(OPT_S)) + ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); + + // Handle -order_file <file> + for (auto orderFile : parsedArgs->filtered(OPT_order_file)) { + if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-order_file " + << orderFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -rpath <path> + if (parsedArgs->hasArg(OPT_rpath)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!ctx.minOS("10.5", "2.0")) { + if (ctx.os() == MachOLinkingContext::OS::macOSX) { + diagnostics << "error: -rpath can only be used when targeting " + "OS X 10.5 or later\n"; + } else { + diagnostics << "error: -rpath can only be used when targeting " + "iOS 2.0 or later\n"; + } + return false; + } + break; + default: + diagnostics << "error: -rpath can only be used when creating " + "a dynamic final linked image\n"; + return false; + } + + for (auto rPath : parsedArgs->filtered(OPT_rpath)) { + ctx.addRpath(rPath->getValue()); + } + } + + // Handle input files + for (auto &arg : *parsedArgs) { + bool upward; + ErrorOr<StringRef> resolvedPath = StringRef(); + switch (arg->getOption().getID()) { + default: + continue; + case OPT_INPUT: + addFile(arg->getValue(), ctx, globalWholeArchive, false, diagnostics); + break; + case OPT_upward_library: + addFile(arg->getValue(), ctx, false, true, diagnostics); + break; + case OPT_force_load: + addFile(arg->getValue(), ctx, true, false, diagnostics); + break; + case OPT_l: + case OPT_upward_l: + upward = (arg->getOption().getID() == OPT_upward_l); + resolvedPath = ctx.searchLibrary(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find library for " << arg->getSpelling() + << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "library " + << canonicalizePath(resolvedPath.get()) << '\n'; + } + addFile(resolvedPath.get(), ctx, globalWholeArchive, upward, diagnostics); + break; + case OPT_framework: + case OPT_upward_framework: + upward = (arg->getOption().getID() == OPT_upward_framework); + resolvedPath = ctx.findPathForFramework(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find framework for " + << arg->getSpelling() << " " << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "framework " + << canonicalizePath(resolvedPath.get()) << '\n'; + } + addFile(resolvedPath.get(), ctx, globalWholeArchive, upward, diagnostics); + break; + case OPT_filelist: + if (std::error_code ec = loadFileList(arg->getValue(), + ctx, globalWholeArchive, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-filelist " << arg->getValue() + << "'\n"; + return false; + } + break; + } + } + + if (ctx.getNodes().empty()) { + diagnostics << "No input files\n"; + return false; + } + + // Validate the combination of options used. + return ctx.validate(diagnostics); +} + + +} // namespace lld diff --git a/lib/Driver/DarwinLdOptions.td b/lib/Driver/DarwinLdOptions.td new file mode 100644 index 000000000000..81dcc0a1d925 --- /dev/null +++ b/lib/Driver/DarwinLdOptions.td @@ -0,0 +1,187 @@ +include "llvm/Option/OptParser.td" + + +// output kinds +def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group<grp_kind>; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group<grp_kind>; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group<grp_kind>; +def dylib : Flag<["-"], "dylib">, + HelpText<"Create dynamic library">, Group<grp_kind>; +def bundle : Flag<["-"], "bundle">, + HelpText<"Create dynamic bundle">, Group<grp_kind>; +def execute : Flag<["-"], "execute">, + HelpText<"Create main executable (default)">, Group<grp_kind>; +def preload : Flag<["-"], "preload">, + HelpText<"Create binary for use with embedded systems">, Group<grp_kind>; + +// optimizations +def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; +def dead_strip : Flag<["-"], "dead_strip">, + HelpText<"Remove unreference code and data">, Group<grp_opts>; +def macosx_version_min : Separate<["-"], "macosx_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum Mac OS X version">, Group<grp_opts>; +def ios_version_min : Separate<["-"], "ios_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS version">, Group<grp_opts>; +def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, + Alias<ios_version_min>; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS simulator version">, Group<grp_opts>; +def mllvm : Separate<["-"], "mllvm">, + MetaVarName<"<option>">, + HelpText<"Options to pass to LLVM during LTO">, Group<grp_opts>; +def exported_symbols_list : Separate<["-"], "exported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def exported_symbol : Separate<["-"], "exported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Lists symbols that should not be exported">, Group<grp_opts>; +def unexported_symbol : Separate<["-"], "unexported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"A symbol which should not be exported">, Group<grp_opts>; +def keep_private_externs : Flag<["-"], "keep_private_externs">, + HelpText<"Private extern (hidden) symbols should not be transformed " + "into local symbols">, Group<grp_opts>; +def order_file : Separate<["-"], "order_file">, + MetaVarName<"<file-path>">, + HelpText<"re-order and move specified symbols to start of their section">, + Group<grp_opts>; + +// main executable options +def grp_main : OptionGroup<"opts">, HelpText<"MAIN EXECUTABLE OPTIONS">; +def entry : Separate<["-"], "e">, + MetaVarName<"<entry-name>">, + HelpText<"entry symbol name">,Group<grp_main>; +def pie : Flag<["-"], "pie">, + HelpText<"Create Position Independent Executable (for ASLR)">, + Group<grp_main>; +def no_pie : Flag<["-"], "no_pie">, + HelpText<"Do not create Position Independent Executable">, + Group<grp_main>; + +// dylib executable options +def grp_dylib : OptionGroup<"opts">, HelpText<"DYLIB EXECUTABLE OPTIONS">; +def install_name : Separate<["-"], "install_name">, + MetaVarName<"<path>">, + HelpText<"The dylib's install name">, Group<grp_dylib>; +def mark_dead_strippable_dylib : Flag<["-"], "mark_dead_strippable_dylib">, + HelpText<"Marks the dylib as having no side effects during initialization">, + Group<grp_dylib>; +def compatibility_version : Separate<["-"], "compatibility_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's compatibility version">, Group<grp_dylib>; +def current_version : Separate<["-"], "current_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's current version">, Group<grp_dylib>; + +// dylib executable options - compatibility aliases +def dylib_install_name : Separate<["-"], "dylib_install_name">, + Alias<install_name>; +def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version">, + MetaVarName<"<version>">, Alias<compatibility_version>; +def dylib_current_version : Separate<["-"], "dylib_current_version">, + MetaVarName<"<version>">, Alias<current_version>; + +// bundle executable options +def grp_bundle : OptionGroup<"opts">, HelpText<"BUNDLE EXECUTABLE OPTIONS">; +def bundle_loader : Separate<["-"], "bundle_loader">, + MetaVarName<"<path>">, + HelpText<"The executable that will be loading this Mach-O bundle">, + Group<grp_bundle>; + +// library options +def grp_libs : OptionGroup<"libs">, HelpText<"LIBRARY OPTIONS">; +def L : JoinedOrSeparate<["-"], "L">, + MetaVarName<"<dir>">, + HelpText<"Add directory to library search path">, Group<grp_libs>; +def F : JoinedOrSeparate<["-"], "F">, + MetaVarName<"<dir>">, + HelpText<"Add directory to framework search path">, Group<grp_libs>; +def Z : Flag<["-"], "Z">, + HelpText<"Do not search standard directories for libraries or frameworks">; +def all_load : Flag<["-"], "all_load">, + HelpText<"Forces all members of all static libraries to be loaded">, + Group<grp_libs>; +def force_load : Separate<["-"], "force_load">, + MetaVarName<"<library-path>">, + HelpText<"Forces all members of specified static libraries to be loaded">, + Group<grp_libs>; +def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"<dir>">, + HelpText<"Add path to SDK to all absolute library search paths">, + Group<grp_libs>; + +// Input options +def l : Joined<["-"], "l">, + MetaVarName<"<libname>">, + HelpText<"Base name of library searched for in -L directories">; +def upward_l : Joined<["-"], "upward-l">, + MetaVarName<"<libname>">, + HelpText<"Base name of upward library searched for in -L directories">; +def framework : Separate<["-"], "framework">, + MetaVarName<"<name>">, + HelpText<"Base name of framework searched for in -F directories">; +def upward_framework : Separate<["-"], "upward_framework">, + MetaVarName<"<name>">, + HelpText<"Base name of upward framework searched for in -F directories">; +def upward_library : Separate<["-"], "upward_library">, + MetaVarName<"<path>">, + HelpText<"path to upward dylib to link with">; +def filelist : Separate<["-"], "filelist">, + MetaVarName<"<path>">, + HelpText<"file containing paths to input files">; + + +// test case options +def print_atoms : Flag<["-"], "print_atoms">, + HelpText<"Emit output as yaml atoms">; +def test_file_usage : Flag<["-"], "test_file_usage">, + HelpText<"Only files specified by -file_exists are considered to exist. " + "Print which files would be used">; +def path_exists : Separate<["-"], "path_exists">, + MetaVarName<"<path>">, + HelpText<"Used with -test_file_usage to declare a path">; + + +// general options +def output : Separate<["-"], "o">, + MetaVarName<"<path>">, + HelpText<"Output file path">; +def arch : Separate<["-"], "arch">, + MetaVarName<"<arch-name>">, + HelpText<"Architecture to link">; +def sectalign : MultiArg<["-"], "sectalign", 3>, + MetaVarName<"<segname> <sectname> <alignment>">, + HelpText<"alignment for segment/section">; +def image_base : Separate<["-"], "image_base">; +def seg1addr : Separate<["-"], "seg1addr">, Alias<image_base>; +def demangle : Flag<["-"], "demangle">, + HelpText<"Demangles symbol names in errors and warnings">; +def dependency_info : Separate<["-"], "dependency_info">, + MetaVarName<"<file>">, + HelpText<"Write binary list of files used during link">; +def S : Flag<["-"], "S">, + HelpText<"Remove debug information (STABS or DWARF) from the output file">; +def rpath : Separate<["-"], "rpath">, + MetaVarName<"<path>">, + HelpText<"Add path to the runpath search path list for image being created">; + +def t : Flag<["-"], "t">, + HelpText<"Print the names of the input files as ld processes them">; +def v : Flag<["-"], "v">, + HelpText<"Print linker information">; + +// Obsolete options +def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">; +def single_module : Flag<["-"], "single_module">, + HelpText<"Default for dylibs">, Group<grp_obsolete>; +def multi_module : Flag<["-"], "multi_module">, + HelpText<"Unsupported way to build dylibs">, Group<grp_obsolete>; diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp new file mode 100644 index 000000000000..d32bfa6e47be --- /dev/null +++ b/lib/Driver/Driver.cpp @@ -0,0 +1,130 @@ +//===- lib/Driver/Driver.cpp - Linker Driver Emulator ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/raw_ostream.h" +#include <mutex> + +namespace lld { + +FileVector makeErrorFile(StringRef path, std::error_code ec) { + std::vector<std::unique_ptr<File>> result; + result.push_back(llvm::make_unique<ErrorFile>(path, ec)); + return result; +} + +FileVector parseMemberFiles(FileVector &files) { + std::vector<std::unique_ptr<File>> members; + for (std::unique_ptr<File> &file : files) { + if (auto *archive = dyn_cast<ArchiveLibraryFile>(file.get())) { + if (std::error_code ec = archive->parseAllMembers(members)) + return makeErrorFile(file->path(), ec); + } else { + members.push_back(std::move(file)); + } + } + return members; +} + +FileVector loadFile(LinkingContext &ctx, StringRef path, bool wholeArchive) { + ErrorOr<std::unique_ptr<MemoryBuffer>> mb + = MemoryBuffer::getFileOrSTDIN(path); + if (std::error_code ec = mb.getError()) + return makeErrorFile(path, ec); + std::vector<std::unique_ptr<File>> files; + if (std::error_code ec = ctx.registry().loadFile(std::move(mb.get()), files)) + return makeErrorFile(path, ec); + if (wholeArchive) + return parseMemberFiles(files); + return files; +} + +/// This is where the link is actually performed. +bool Driver::link(LinkingContext &context, raw_ostream &diagnostics) { + // Honor -mllvm + if (!context.llvmOptions().empty()) { + unsigned numArgs = context.llvmOptions().size(); + const char **args = new const char *[numArgs + 2]; + args[0] = "lld (LLVM option parsing)"; + for (unsigned i = 0; i != numArgs; ++i) + args[i + 1] = context.llvmOptions()[i]; + args[numArgs + 1] = 0; + llvm::cl::ParseCommandLineOptions(numArgs + 1, args); + } + if (context.getNodes().empty()) + return false; + + for (std::unique_ptr<Node> &ie : context.getNodes()) + if (FileNode *node = dyn_cast<FileNode>(ie.get())) + context.getTaskGroup().spawn([node] { node->getFile()->parse(); }); + + std::vector<std::unique_ptr<File>> internalFiles; + context.createInternalFiles(internalFiles); + for (auto i = internalFiles.rbegin(), e = internalFiles.rend(); i != e; ++i) { + auto &members = context.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } + + // Give target a chance to add files. + std::vector<std::unique_ptr<File>> implicitFiles; + context.createImplicitFiles(implicitFiles); + for (auto i = implicitFiles.rbegin(), e = implicitFiles.rend(); i != e; ++i) { + auto &members = context.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } + + // Give target a chance to postprocess input files. + // Mach-O uses this chance to move all object files before library files. + // ELF adds specific undefined symbols resolver. + context.finalizeInputFiles(); + + // Do core linking. + ScopedTask resolveTask(getDefaultDomain(), "Resolve"); + Resolver resolver(context); + if (!resolver.resolve()) + return false; + std::unique_ptr<MutableFile> merged = resolver.resultFile(); + resolveTask.end(); + + // Run passes on linked atoms. + ScopedTask passTask(getDefaultDomain(), "Passes"); + PassManager pm; + context.addPasses(pm); + pm.runOnFile(merged); + passTask.end(); + + // Give linked atoms to Writer to generate output file. + ScopedTask writeTask(getDefaultDomain(), "Write"); + if (std::error_code ec = context.writeFile(*merged)) { + diagnostics << "Failed to write file '" << context.outputPath() + << "': " << ec.message() << "\n"; + return false; + } + + return true; +} + +} // namespace diff --git a/lib/Driver/GnuLdDriver.cpp b/lib/Driver/GnuLdDriver.cpp new file mode 100644 index 000000000000..b9af04d4b615 --- /dev/null +++ b/lib/Driver/GnuLdDriver.cpp @@ -0,0 +1,760 @@ +//===- lib/Driver/GnuLdDriver.cpp -----------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for GNU's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "lld/ReaderWriter/ELFTargets.h" +#include "lld/ReaderWriter/LinkerScript.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> +#include <tuple> + +using namespace lld; + +using llvm::BumpPtrAllocator; + +namespace { + +// Create enum with OPT_xxx values for each option in GnuLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "GnuLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in GnuLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "GnuLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in GnuLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "GnuLdOptions.inc" +#undef OPTION +}; + + +// Create OptTable class for parsing actual command line arguments +class GnuLdOptTable : public llvm::opt::OptTable { +public: + GnuLdOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable)){} +}; + +class DriverStringSaver : public llvm::cl::StringSaver { +public: + DriverStringSaver(BumpPtrAllocator &alloc) : _alloc(alloc) {} + + const char *SaveString(const char *s) override { + char *p = _alloc.Allocate<char>(strlen(s) + 1); + strcpy(p, s); + return p; + } + +private: + BumpPtrAllocator &_alloc; +}; + +} // anonymous namespace + +// If a command line option starts with "@", the driver reads its suffix as a +// file, parse its contents as a list of command line options, and insert them +// at the original @file position. If file cannot be read, @file is not expanded +// and left unmodified. @file can appear in a response file, so it's a recursive +// process. +static std::tuple<int, const char **> +maybeExpandResponseFiles(int argc, const char **argv, BumpPtrAllocator &alloc) { + // Expand response files. + SmallVector<const char *, 256> smallvec; + for (int i = 0; i < argc; ++i) + smallvec.push_back(argv[i]); + DriverStringSaver saver(alloc); + llvm::cl::ExpandResponseFiles(saver, llvm::cl::TokenizeGNUCommandLine, smallvec); + + // Pack the results to a C-array and return it. + argc = smallvec.size(); + const char **copy = alloc.Allocate<const char *>(argc + 1); + std::copy(smallvec.begin(), smallvec.end(), copy); + copy[argc] = nullptr; + return std::make_tuple(argc, copy); +} + +static std::error_code +getFileMagic(StringRef path, llvm::sys::fs::file_magic &magic) { + std::error_code ec = llvm::sys::fs::identify_magic(path, magic); + if (ec) + return ec; + switch (magic) { + case llvm::sys::fs::file_magic::archive: + case llvm::sys::fs::file_magic::elf_relocatable: + case llvm::sys::fs::file_magic::elf_shared_object: + case llvm::sys::fs::file_magic::unknown: + return std::error_code(); + default: + return make_dynamic_error_code(StringRef("unknown type of object file")); + } +} + +// Parses an argument of --defsym=<sym>=<number> +static bool parseDefsymAsAbsolute(StringRef opt, StringRef &sym, + uint64_t &addr) { + size_t equalPos = opt.find('='); + if (equalPos == 0 || equalPos == StringRef::npos) + return false; + sym = opt.substr(0, equalPos); + if (opt.substr(equalPos + 1).getAsInteger(0, addr)) + return false; + return true; +} + +// Parses an argument of --defsym=<sym>=<sym> +static bool parseDefsymAsAlias(StringRef opt, StringRef &sym, + StringRef &target) { + size_t equalPos = opt.find('='); + if (equalPos == 0 || equalPos == StringRef::npos) + return false; + sym = opt.substr(0, equalPos); + target = opt.substr(equalPos + 1); + return !target.empty(); +} + +// Parses -z max-page-size=<value> +static bool parseMaxPageSize(StringRef opt, uint64_t &val) { + size_t equalPos = opt.find('='); + if (equalPos == 0 || equalPos == StringRef::npos) + return false; + StringRef value = opt.substr(equalPos + 1); + val = 0; + if (value.getAsInteger(0, val) || !val) + return false; + return true; +} + +bool GnuLdDriver::linkELF(int argc, const char *argv[], raw_ostream &diag) { + BumpPtrAllocator alloc; + std::tie(argc, argv) = maybeExpandResponseFiles(argc, argv, alloc); + std::unique_ptr<ELFLinkingContext> options; + if (!parse(argc, argv, options, diag)) + return false; + if (!options) + return true; + bool linked = link(*options, diag); + + // Handle --stats. + if (options->collectStats()) { + llvm::TimeRecord t = llvm::TimeRecord::getCurrentTime(true); + diag << "total time in link " << t.getProcessTime() << "\n"; + diag << "data size " << t.getMemUsed() << "\n"; + } + return linked; +} + +static llvm::Optional<llvm::Triple::ArchType> +getArchType(const llvm::Triple &triple, StringRef value) { + switch (triple.getArch()) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + if (value == "elf_i386") + return llvm::Triple::x86; + if (value == "elf_x86_64") + return llvm::Triple::x86_64; + return llvm::None; + case llvm::Triple::mipsel: + case llvm::Triple::mips64el: + if (value == "elf32ltsmip") + return llvm::Triple::mipsel; + if (value == "elf64ltsmip") + return llvm::Triple::mips64el; + return llvm::None; + case llvm::Triple::aarch64: + if (value == "aarch64linux") + return llvm::Triple::aarch64; + return llvm::None; + case llvm::Triple::arm: + if (value == "armelf_linux_eabi") + return llvm::Triple::arm; + return llvm::None; + default: + return llvm::None; + } +} + +static bool isLinkerScript(StringRef path, raw_ostream &diag) { + llvm::sys::fs::file_magic magic = llvm::sys::fs::file_magic::unknown; + std::error_code ec = getFileMagic(path, magic); + if (ec) { + diag << "unknown input file format for file " << path << "\n"; + return false; + } + return magic == llvm::sys::fs::file_magic::unknown; +} + +static ErrorOr<StringRef> +findFile(ELFLinkingContext &ctx, StringRef path, bool dashL) { + // If the path was referred to by using a -l argument, let's search + // for the file in the search path. + if (dashL) { + ErrorOr<StringRef> pathOrErr = ctx.searchLibrary(path); + if (std::error_code ec = pathOrErr.getError()) + return make_dynamic_error_code( + Twine("Unable to find library -l") + path + ": " + ec.message()); + path = pathOrErr.get(); + } + if (!llvm::sys::fs::exists(path)) + return make_dynamic_error_code( + Twine("lld: cannot find file ") + path); + return path; +} + +static bool isPathUnderSysroot(StringRef sysroot, StringRef path) { + if (sysroot.empty()) + return false; + while (!path.empty() && !llvm::sys::fs::equivalent(sysroot, path)) + path = llvm::sys::path::parent_path(path); + return !path.empty(); +} + +static std::error_code +addFilesFromLinkerScript(ELFLinkingContext &ctx, StringRef scriptPath, + const std::vector<script::Path> &inputPaths, + raw_ostream &diag) { + bool sysroot = (!ctx.getSysroot().empty() + && isPathUnderSysroot(ctx.getSysroot(), scriptPath)); + for (const script::Path &path : inputPaths) { + ErrorOr<StringRef> pathOrErr = path._isDashlPrefix + ? ctx.searchLibrary(path._path) : ctx.searchFile(path._path, sysroot); + if (std::error_code ec = pathOrErr.getError()) { + auto file = llvm::make_unique<ErrorFile>(path._path, ec); + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); + continue; + } + + std::vector<std::unique_ptr<File>> files + = loadFile(ctx, pathOrErr.get(), false); + for (std::unique_ptr<File> &file : files) { + if (ctx.logInputFiles()) + diag << file->path() << "\n"; + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); + } + } + return std::error_code(); +} + +std::error_code GnuLdDriver::evalLinkerScript(ELFLinkingContext &ctx, + std::unique_ptr<MemoryBuffer> mb, + raw_ostream &diag, + bool nostdlib) { + // Read the script file from disk and parse. + StringRef path = mb->getBufferIdentifier(); + auto parser = llvm::make_unique<script::Parser>(std::move(mb)); + if (std::error_code ec = parser->parse()) + return ec; + script::LinkerScript *script = parser->get(); + if (!script) + return LinkerScriptReaderError::parse_error; + // Evaluate script commands. + // Currently we only recognize this subset of linker script commands. + for (const script::Command *c : script->_commands) { + if (auto *input = dyn_cast<script::Input>(c)) + if (std::error_code ec = addFilesFromLinkerScript( + ctx, path, input->getPaths(), diag)) + return ec; + if (auto *group = dyn_cast<script::Group>(c)) { + int origSize = ctx.getNodes().size(); + if (std::error_code ec = addFilesFromLinkerScript( + ctx, path, group->getPaths(), diag)) + return ec; + size_t groupSize = ctx.getNodes().size() - origSize; + ctx.getNodes().push_back(llvm::make_unique<GroupEnd>(groupSize)); + } + if (auto *searchDir = dyn_cast<script::SearchDir>(c)) + if (!nostdlib) + ctx.addSearchPath(searchDir->getSearchPath()); + if (auto *entry = dyn_cast<script::Entry>(c)) + ctx.setEntrySymbolName(entry->getEntryName()); + if (auto *output = dyn_cast<script::Output>(c)) + ctx.setOutputPath(output->getOutputFileName()); + if (auto *externs = dyn_cast<script::Extern>(c)) { + for (auto symbol : *externs) { + ctx.addInitialUndefinedSymbol(symbol); + } + } + } + // Transfer ownership of the script to the linking context + ctx.linkerScriptSema().addLinkerScript(std::move(parser)); + return std::error_code(); +} + +bool GnuLdDriver::applyEmulation(llvm::Triple &triple, + llvm::opt::InputArgList &args, + raw_ostream &diag) { + llvm::opt::Arg *arg = args.getLastArg(OPT_m); + if (!arg) + return true; + llvm::Optional<llvm::Triple::ArchType> arch = + getArchType(triple, arg->getValue()); + if (!arch) { + diag << "error: unsupported emulation '" << arg->getValue() << "'.\n"; + return false; + } + triple.setArch(*arch); + return true; +} + +void GnuLdDriver::addPlatformSearchDirs(ELFLinkingContext &ctx, + llvm::Triple &triple, + llvm::Triple &baseTriple) { + if (triple.getOS() == llvm::Triple::NetBSD && + triple.getArch() == llvm::Triple::x86 && + baseTriple.getArch() == llvm::Triple::x86_64) { + ctx.addSearchPath("=/usr/lib/i386"); + return; + } + ctx.addSearchPath("=/usr/lib"); +} + +std::unique_ptr<ELFLinkingContext> +GnuLdDriver::createELFLinkingContext(llvm::Triple triple) { + std::unique_ptr<ELFLinkingContext> p; + // FIXME: #include "llvm/Config/Targets.def" +#define LLVM_TARGET(targetName) \ + if ((p = elf::targetName##LinkingContext::create(triple))) return p; + LLVM_TARGET(AArch64) + LLVM_TARGET(ARM) + LLVM_TARGET(Hexagon) + LLVM_TARGET(Mips) + LLVM_TARGET(X86) + LLVM_TARGET(Example) + LLVM_TARGET(X86_64) +#undef LLVM_TARGET + return nullptr; +} + +static llvm::Optional<bool> +getBool(const llvm::opt::InputArgList &parsedArgs, + unsigned yesFlag, unsigned noFlag) { + if (auto *arg = parsedArgs.getLastArg(yesFlag, noFlag)) + return arg->getOption().getID() == yesFlag; + return llvm::None; +} + +bool GnuLdDriver::parse(int argc, const char *argv[], + std::unique_ptr<ELFLinkingContext> &context, + raw_ostream &diag) { + // Parse command line options using GnuLdOptions.td + std::unique_ptr<llvm::opt::InputArgList> parsedArgs; + GnuLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + + parsedArgs.reset( + table.ParseArgs(&argv[1], &argv[argc], missingIndex, missingCount)); + if (missingCount) { + diag << "error: missing arg value for '" + << parsedArgs->getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + // Handle --help + if (parsedArgs->hasArg(OPT_help)) { + table.PrintHelp(llvm::outs(), argv[0], "LLVM Linker", false); + return true; + } + + // Use -target or use default target triple to instantiate LinkingContext + llvm::Triple baseTriple; + if (auto *arg = parsedArgs->getLastArg(OPT_target)) { + baseTriple = llvm::Triple(arg->getValue()); + } else { + baseTriple = getDefaultTarget(argv[0]); + } + llvm::Triple triple(baseTriple); + + if (!applyEmulation(triple, *parsedArgs, diag)) + return false; + + std::unique_ptr<ELFLinkingContext> ctx(createELFLinkingContext(triple)); + + if (!ctx) { + diag << "unknown target triple\n"; + return false; + } + + // Copy mllvm + for (auto *arg : parsedArgs->filtered(OPT_mllvm)) + ctx->appendLLVMOption(arg->getValue()); + + // Ignore unknown arguments. + for (auto unknownArg : parsedArgs->filtered(OPT_UNKNOWN)) + diag << "warning: ignoring unknown argument: " + << unknownArg->getValue() << "\n"; + + // Set sys root path. + if (auto *arg = parsedArgs->getLastArg(OPT_sysroot)) + ctx->setSysroot(arg->getValue()); + + // Handle --demangle option(For compatibility) + if (parsedArgs->hasArg(OPT_demangle)) + ctx->setDemangleSymbols(true); + + // Handle --no-demangle option. + if (parsedArgs->hasArg(OPT_no_demangle)) + ctx->setDemangleSymbols(false); + + // Figure out output kind (-r, -static, -shared) + if (parsedArgs->hasArg(OPT_relocatable)) { + ctx->setOutputELFType(llvm::ELF::ET_REL); + ctx->setPrintRemainingUndefines(false); + ctx->setAllowRemainingUndefines(true); + } + + if (parsedArgs->hasArg(OPT_static)) { + ctx->setOutputELFType(llvm::ELF::ET_EXEC); + ctx->setIsStaticExecutable(true); + } + + if (parsedArgs->hasArg(OPT_shared)) { + ctx->setOutputELFType(llvm::ELF::ET_DYN); + ctx->setAllowShlibUndefines(true); + ctx->setUseShlibUndefines(false); + ctx->setPrintRemainingUndefines(false); + ctx->setAllowRemainingUndefines(true); + } + + // Handle --stats. + if (parsedArgs->hasArg(OPT_stats)) { + ctx->setCollectStats(true); + } + + // Figure out if the output type is nmagic/omagic + if (auto *arg = parsedArgs->getLastArg( + OPT_nmagic, OPT_omagic, OPT_no_omagic)) { + switch (arg->getOption().getID()) { + case OPT_nmagic: + ctx->setOutputMagic(ELFLinkingContext::OutputMagic::NMAGIC); + ctx->setIsStaticExecutable(true); + break; + case OPT_omagic: + ctx->setOutputMagic(ELFLinkingContext::OutputMagic::OMAGIC); + ctx->setIsStaticExecutable(true); + break; + case OPT_no_omagic: + ctx->setOutputMagic(ELFLinkingContext::OutputMagic::DEFAULT); + ctx->setNoAllowDynamicLibraries(); + break; + } + } + + if (parsedArgs->hasArg(OPT_strip_all)) + ctx->setStripSymbols(true); + + if (auto *arg = parsedArgs->getLastArg(OPT_soname)) + ctx->setSharedObjectName(arg->getValue()); + + if (parsedArgs->hasArg(OPT_rosegment)) + ctx->setCreateSeparateROSegment(); + + if (parsedArgs->hasArg(OPT_no_align_segments)) + ctx->setAlignSegments(false); + + if (auto *arg = parsedArgs->getLastArg(OPT_image_base)) { + uint64_t baseAddress = 0; + StringRef inputValue = arg->getValue(); + if (inputValue.getAsInteger(0, baseAddress) || !baseAddress) { + diag << "invalid value for image base " << inputValue << "\n"; + return false; + } + ctx->setBaseAddress(baseAddress); + } + + if (parsedArgs->hasArg(OPT_merge_strings)) + ctx->setMergeCommonStrings(true); + + if (parsedArgs->hasArg(OPT_t)) + ctx->setLogInputFiles(true); + + if (parsedArgs->hasArg(OPT_use_shlib_undefs)) + ctx->setUseShlibUndefines(true); + + if (auto val = getBool(*parsedArgs, OPT_allow_shlib_undefs, + OPT_no_allow_shlib_undefs)) + ctx->setAllowShlibUndefines(*val); + + if (auto *arg = parsedArgs->getLastArg(OPT_e)) + ctx->setEntrySymbolName(arg->getValue()); + + if (auto *arg = parsedArgs->getLastArg(OPT_output)) + ctx->setOutputPath(arg->getValue()); + + if (parsedArgs->hasArg(OPT_noinhibit_exec)) + ctx->setAllowRemainingUndefines(true); + + if (auto val = getBool(*parsedArgs, OPT_export_dynamic, + OPT_no_export_dynamic)) + ctx->setExportDynamic(*val); + + if (parsedArgs->hasArg(OPT_allow_multiple_definition)) + ctx->setAllowDuplicates(true); + + if (auto *arg = parsedArgs->getLastArg(OPT_dynamic_linker)) + ctx->setInterpreter(arg->getValue()); + + if (auto *arg = parsedArgs->getLastArg(OPT_init)) + ctx->setInitFunction(arg->getValue()); + + if (auto *arg = parsedArgs->getLastArg(OPT_fini)) + ctx->setFiniFunction(arg->getValue()); + + if (auto *arg = parsedArgs->getLastArg(OPT_output_filetype)) + ctx->setOutputFileType(arg->getValue()); + + for (auto *arg : parsedArgs->filtered(OPT_L)) + ctx->addSearchPath(arg->getValue()); + + // Add the default search directory specific to the target. + if (!parsedArgs->hasArg(OPT_nostdlib)) + addPlatformSearchDirs(*ctx, triple, baseTriple); + + for (auto *arg : parsedArgs->filtered(OPT_u)) + ctx->addInitialUndefinedSymbol(arg->getValue()); + + for (auto *arg : parsedArgs->filtered(OPT_defsym)) { + StringRef sym, target; + uint64_t addr; + if (parseDefsymAsAbsolute(arg->getValue(), sym, addr)) { + ctx->addInitialAbsoluteSymbol(sym, addr); + } else if (parseDefsymAsAlias(arg->getValue(), sym, target)) { + ctx->addAlias(sym, target); + } else { + diag << "invalid --defsym: " << arg->getValue() << "\n"; + return false; + } + } + + for (auto *arg : parsedArgs->filtered(OPT_z)) { + StringRef opt = arg->getValue(); + if (opt == "muldefs") { + ctx->setAllowDuplicates(true); + } else if (opt.startswith("max-page-size")) { + // Parse -z max-page-size option. + // The default page size is considered the minimum page size the user + // can set, check the user input if its atleast the minimum page size + // and does not exceed the maximum page size allowed for the target. + uint64_t maxPageSize = 0; + + // Error if the page size user set is less than the maximum page size + // and greather than the default page size and the user page size is a + // modulo of the default page size. + if ((!parseMaxPageSize(opt, maxPageSize)) || + (maxPageSize < ctx->getPageSize()) || + (maxPageSize % ctx->getPageSize())) { + diag << "invalid option: " << opt << "\n"; + return false; + } + ctx->setMaxPageSize(maxPageSize); + } else { + diag << "warning: ignoring unknown argument for -z: " << opt << "\n"; + } + } + + for (auto *arg : parsedArgs->filtered(OPT_rpath)) { + SmallVector<StringRef, 2> rpaths; + StringRef(arg->getValue()).split(rpaths, ":"); + for (auto path : rpaths) + ctx->addRpath(path); + } + + for (auto *arg : parsedArgs->filtered(OPT_rpath_link)) { + SmallVector<StringRef, 2> rpaths; + StringRef(arg->getValue()).split(rpaths, ":"); + for (auto path : rpaths) + ctx->addRpathLink(path); + } + + // Support --wrap option. + for (auto *arg : parsedArgs->filtered(OPT_wrap)) + ctx->addWrapForSymbol(arg->getValue()); + + // Register possible input file parsers. + ctx->registry().addSupportELFObjects(*ctx); + ctx->registry().addSupportArchives(ctx->logInputFiles()); + ctx->registry().addSupportYamlFiles(); + ctx->registry().addSupportNativeObjects(); + if (ctx->allowLinkWithDynamicLibraries()) + ctx->registry().addSupportELFDynamicSharedObjects(*ctx); + + std::stack<int> groupStack; + int numfiles = 0; + bool asNeeded = false; + bool wholeArchive = false; + + // Process files + for (auto arg : *parsedArgs) { + switch (arg->getOption().getID()) { + case OPT_no_whole_archive: + wholeArchive = false; + break; + + case OPT_whole_archive: + wholeArchive = true; + break; + + case OPT_as_needed: + asNeeded = true; + break; + + case OPT_no_as_needed: + asNeeded = false; + break; + + case OPT_start_group: + groupStack.push(numfiles); + break; + + case OPT_end_group: { + if (groupStack.empty()) { + diag << "stray --end-group\n"; + return false; + } + int startGroupPos = groupStack.top(); + ctx->getNodes().push_back( + llvm::make_unique<GroupEnd>(numfiles - startGroupPos)); + groupStack.pop(); + break; + } + + case OPT_INPUT: + case OPT_l: + case OPT_T: { + bool dashL = (arg->getOption().getID() == OPT_l); + StringRef path = arg->getValue(); + + ErrorOr<StringRef> pathOrErr = findFile(*ctx, path, dashL); + if (std::error_code ec = pathOrErr.getError()) { + auto file = llvm::make_unique<ErrorFile>(path, ec); + auto node = llvm::make_unique<FileNode>(std::move(file)); + node->setAsNeeded(asNeeded); + ctx->getNodes().push_back(std::move(node)); + break; + } + StringRef realpath = pathOrErr.get(); + + bool isScript = + (!path.endswith(".objtxt") && isLinkerScript(realpath, diag)); + if (isScript) { + if (ctx->logInputFiles()) + diag << path << "\n"; + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(realpath); + if (std::error_code ec = mb.getError()) { + diag << "Cannot open " << path << ": " << ec.message() << "\n"; + return false; + } + bool nostdlib = parsedArgs->hasArg(OPT_nostdlib); + std::error_code ec = + evalLinkerScript(*ctx, std::move(mb.get()), diag, nostdlib); + if (ec) { + diag << path << ": Error parsing linker script: " + << ec.message() << "\n"; + return false; + } + break; + } + std::vector<std::unique_ptr<File>> files + = loadFile(*ctx, realpath, wholeArchive); + for (std::unique_ptr<File> &file : files) { + if (ctx->logInputFiles()) + diag << file->path() << "\n"; + auto node = llvm::make_unique<FileNode>(std::move(file)); + node->setAsNeeded(asNeeded); + ctx->getNodes().push_back(std::move(node)); + } + numfiles += files.size(); + break; + } + } + } + + if (ctx->getNodes().empty()) { + diag << "No input files\n"; + return false; + } + + // Set default output file name if the output file was not specified. + if (ctx->outputPath().empty()) { + switch (ctx->outputFileType()) { + case LinkingContext::OutputFileType::YAML: + ctx->setOutputPath("-"); + break; + case LinkingContext::OutputFileType::Native: + ctx->setOutputPath("a.native"); + break; + default: + ctx->setOutputPath("a.out"); + break; + } + } + + // Validate the combination of options used. + if (!ctx->validate(diag)) + return false; + + // Perform linker script semantic actions + ctx->linkerScriptSema().perform(); + + context.swap(ctx); + return true; +} + +/// Get the default target triple based on either the program name +/// (e.g. "x86-ibm-linux-lld") or the primary target llvm was configured for. +llvm::Triple GnuLdDriver::getDefaultTarget(const char *progName) { + SmallVector<StringRef, 4> components; + llvm::SplitString(llvm::sys::path::stem(progName), components, "-"); + // If has enough parts to be start with a triple. + if (components.size() >= 4) { + llvm::Triple triple(components[0], components[1], components[2], + components[3]); + // If first component looks like an arch. + if (triple.getArch() != llvm::Triple::UnknownArch) + return triple; + } + + // Fallback to use whatever default triple llvm was configured for. + return llvm::Triple(llvm::sys::getDefaultTargetTriple()); +} diff --git a/lib/Driver/GnuLdOptions.td b/lib/Driver/GnuLdOptions.td new file mode 100644 index 000000000000..9d06f2935439 --- /dev/null +++ b/lib/Driver/GnuLdOptions.td @@ -0,0 +1,323 @@ +include "llvm/Option/OptParser.td" + +//===----------------------------------------------------------------------===// +/// Utility Functions +//===----------------------------------------------------------------------===// +// Single and multiple dash options combined +multiclass smDash<string opt1, string opt2, string help> { + // Option + def "" : Separate<["-"], opt1>, HelpText<help>; + def opt1_eq : Joined<["-"], opt1#"=">, + Alias<!cast<Option>(opt1)>; + // Compatibility aliases + def opt2_dashdash : Separate<["--"], opt2>, + Alias<!cast<Option>(opt1)>; + def opt2_dashdash_eq : Joined<["--"], opt2#"=">, + Alias<!cast<Option>(opt1)>; +} + +// Support -<option>,-<option>= +multiclass dashEq<string opt1, string opt2, string help> { + // Option + def "" : Separate<["-"], opt1>, HelpText<help>; + // Compatibility aliases + def opt2_eq : Joined<["-"], opt2#"=">, + Alias<!cast<Option>(opt1)>; +} + +//===----------------------------------------------------------------------===// +/// LLVM and Target options +//===----------------------------------------------------------------------===// +def grp_llvmtarget : OptionGroup<"opts">, + HelpText<"LLVM and Target Options">; +def mllvm : Separate<["-"], "mllvm">, + HelpText<"Options to pass to LLVM">, Group<grp_llvmtarget>; +def target : Separate<["-"], "target">, MetaVarName<"<triple>">, + HelpText<"Target triple to link for">, + Group<grp_llvmtarget>; + +//===----------------------------------------------------------------------===// +/// Output Kinds +//===----------------------------------------------------------------------===// +def grp_kind : OptionGroup<"outs">, + HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group<grp_kind>; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group<grp_kind>; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group<grp_kind>; +def shared : Flag<["-"], "shared">, + HelpText<"Create dynamic library">, Group<grp_kind>; + +// output kinds - compatibility aliases +def Bstatic : Flag<["-"], "Bstatic">, Alias<static>; +def Bshareable : Flag<["-"], "Bshareable">, Alias<shared>; + +//===----------------------------------------------------------------------===// +/// General Options +//===----------------------------------------------------------------------===// +def grp_general : OptionGroup<"opts">, + HelpText<"GENERAL OPTIONS">; +def output : Separate<["-"], "o">, MetaVarName<"<path>">, + HelpText<"Path to file to write output">, + Group<grp_general>; +def m : Separate<["-"], "m">, MetaVarName<"<emulation>">, + HelpText<"Select target emulation">, + Group<grp_general>; +def build_id : Flag<["--"], "build-id">, + HelpText<"Request creation of \".note.gnu.build-id\" ELF note section">, + Group<grp_general>; +def sysroot : Joined<["--"], "sysroot=">, + HelpText<"Set the system root">, + Group<grp_general>; + + +//===----------------------------------------------------------------------===// +/// Executable Options +//===----------------------------------------------------------------------===// +def grp_main : OptionGroup<"opts">, + HelpText<"EXECUTABLE OPTIONS">; +def L : Joined<["-"], "L">, MetaVarName<"<dir>">, + HelpText<"Directory to search for libraries">, + Group<grp_main>; +def l : Joined<["-"], "l">, MetaVarName<"<libName>">, + HelpText<"Root name of library to use">, + Group<grp_main>; +def noinhibit_exec : Flag<["--"], "noinhibit-exec">, + HelpText<"Retain the executable output file whenever" + " it is still usable">, + Group<grp_main>; +defm e : smDash<"e", "entry", + "Name of entry point symbol">, + Group<grp_main>; +defm init: dashEq<"init", "init", + "Specify an initializer function">, + Group<grp_main>; +defm fini: dashEq<"fini", "fini", + "Specify a finalizer function">, + Group<grp_main>; +def whole_archive: Flag<["--"], "whole-archive">, + HelpText<"Force load of all members in a static library">, + Group<grp_main>; +def no_whole_archive: Flag<["--"], "no-whole-archive">, + HelpText<"Restores the default behavior of loading archive members">, + Group<grp_main>; +def nostdlib : Flag<["-"], "nostdlib">, + HelpText<"Disable default search path for libraries">, + Group<grp_main>; +def image_base : Separate<["--"], "image-base">, + HelpText<"Set the base address">, + Group<grp_main>; + +//===----------------------------------------------------------------------===// +/// Static Executable Options +//===----------------------------------------------------------------------===// +def grp_staticexec : OptionGroup<"opts">, + HelpText<"STATIC EXECUTABLE OPTIONS">; +def nmagic : Flag<["--"], "nmagic">, + HelpText<"Turn off page alignment of sections," + " and disable linking against shared libraries">, + Group<grp_staticexec>; +def omagic : Flag<["--"], "omagic">, + HelpText<"Set the text and data sections to be readable and writable." + " Also, do not page-align the data segment, and" + " disable linking against shared libraries.">, + Group<grp_staticexec>; +def no_omagic : Flag<["--"], "no-omagic">, + HelpText<"This option negates most of the effects of the -N option." + "Disable linking with shared libraries">, + Group<grp_staticexec>; +// Compatible Aliases +def nmagic_alias : Flag<["-"], "n">, + Alias<nmagic>; +def omagic_alias : Flag<["-"], "N">, + Alias<omagic>; + +//===----------------------------------------------------------------------===// +/// Dynamic Library/Executable Options +//===----------------------------------------------------------------------===// +def grp_dynlibexec : OptionGroup<"opts">, + HelpText<"DYNAMIC LIBRARY/EXECUTABLE OPTIONS">; +def dynamic_linker : Joined<["--"], "dynamic-linker=">, + HelpText<"Set the path to the dynamic linker">, Group<grp_dynlibexec>; +// Executable options - compatibility aliases +def dynamic_linker_alias : Separate<["-"], "dynamic-linker">, + Alias<dynamic_linker>; +defm rpath : dashEq<"rpath", "rpath", + "Add a directory to the runtime library search path">, + Group<grp_dynlibexec>; +def rpath_link : Separate<["-"], "rpath-link">, + HelpText<"Specifies the first set of directories to search">, + Group<grp_dynlibexec>; +def export_dynamic : Flag<["-", "--"], "export-dynamic">, + HelpText<"Add all symbols to the dynamic symbol table" + " when creating executables">, + Group<grp_main>; +def alias_export_dynamic: Flag<["-"], "E">, + Alias<export_dynamic>; +def no_export_dynamic : Flag<["--"], "no-export-dynamic">, + Group<grp_main>; + +//===----------------------------------------------------------------------===// +/// Dynamic Library Options +//===----------------------------------------------------------------------===// +def grp_dynlib : OptionGroup<"opts">, + HelpText<"DYNAMIC LIBRARY OPTIONS">; +def soname : Joined<["-", "--"], "soname=">, + HelpText<"Set the internal DT_SONAME field to the specified name">, + Group<grp_dynlib>; +def soname_separate : Separate<["-", "--"], "soname">, Alias<soname>; +def soname_h : Separate<["-"], "h">, Alias<soname>; + +//===----------------------------------------------------------------------===// +/// Resolver Options +//===----------------------------------------------------------------------===// +def grp_resolveropt : OptionGroup<"opts">, + HelpText<"SYMBOL RESOLUTION OPTIONS">; +defm u : smDash<"u", "undefined", + "Force symbol to be entered in the output file" + " as an undefined symbol">, + Group<grp_resolveropt>; +def start_group : Flag<["--"], "start-group">, + HelpText<"Start a group">, + Group<grp_resolveropt>; +def alias_start_group: Flag<["-"], "(">, + Alias<start_group>; +def end_group : Flag<["--"], "end-group">, + HelpText<"End a group">, + Group<grp_resolveropt>; +def alias_end_group: Flag<["-"], ")">, + Alias<end_group>; +def as_needed : Flag<["--"], "as-needed">, + HelpText<"This option affects ELF DT_NEEDED tags for " + "dynamic libraries mentioned on the command line">, + Group<grp_resolveropt>; +def no_as_needed : Flag<["--"], "no-as-needed">, + HelpText<"This option restores the default behavior" + " of adding DT_NEEDED entries">, + Group<grp_resolveropt>; +def no_allow_shlib_undefs : Flag<["--"], "no-allow-shlib-undefined">, + HelpText<"Do not allow undefined symbols from dynamic" + " library when creating executables">, + Group<grp_resolveropt>; +def allow_shlib_undefs : Flag<["-", "--"], "allow-shlib-undefined">, + HelpText<"Allow undefined symbols from dynamic" + " library when creating executables">, + Group<grp_resolveropt>; +def use_shlib_undefs: Flag<["--"], "use-shlib-undefines">, + HelpText<"Resolve undefined symbols from dynamic libraries">, + Group<grp_resolveropt>; +def allow_multiple_definition: Flag<["--"], "allow-multiple-definition">, + HelpText<"Allow multiple definitions">, + Group<grp_resolveropt>; +def defsym : Joined<["--"], "defsym=">, + HelpText<"Create a defined symbol">, + Group<grp_resolveropt>; + +//===----------------------------------------------------------------------===// +/// Custom Options +//===----------------------------------------------------------------------===// +def grp_customopts : OptionGroup<"opts">, + HelpText<"CUSTOM OPTIONS">; +def rosegment: Flag<["--"], "rosegment">, + HelpText<"Put read-only non-executable sections in their own segment">, + Group<grp_customopts>; +def z : Separate<["-"], "z">, + HelpText<"Linker Option extensions">, + Group<grp_customopts>; +def no_align_segments: Flag<["--"], "no-align-segments">, + HelpText<"Don't align ELF segments(virtualaddress/fileoffset) to page boundaries">, + Group<grp_customopts>; + +//===----------------------------------------------------------------------===// +/// Symbol options +//===----------------------------------------------------------------------===// +def grp_symbolopts : OptionGroup<"opts">, + HelpText<"SYMBOL OPTIONS">; +def demangle : Flag<["--"], "demangle">, + HelpText<"Demangle C++ symbols">, + Group<grp_symbolopts>; +def no_demangle : Flag<["--"], "no-demangle">, + HelpText<"Dont demangle C++ symbols">, + Group<grp_symbolopts>; +def strip_all : Flag<["--"], "strip-all">, + HelpText<"Omit all symbol informations from output">, + Group<grp_symbolopts>; +def alias_strip_all : Flag<["-"], "s">, + Alias<strip_all>; +defm wrap : smDash<"wrap", "wrap", + "Use a wrapper function for symbol. Any " + " undefined reference to symbol will be resolved to " + "\"__wrap_symbol\". Any undefined reference to \"__real_symbol\"" + " will be resolved to symbol.">, + MetaVarName<"<symbol>">, + Group<grp_symbolopts>; + +//===----------------------------------------------------------------------===// +/// Script Options +//===----------------------------------------------------------------------===// +def grp_scriptopts : OptionGroup<"opts">, + HelpText<"SCRIPT OPTIONS">; +defm T : smDash<"T", "script", + "Use the given linker script in place of the default script.">, + Group<grp_scriptopts>; + +//===----------------------------------------------------------------------===// +/// Optimization Options +//===----------------------------------------------------------------------===// +def grp_opts : OptionGroup<"opts">, + HelpText<"OPTIMIZATIONS">; +def hash_style : Joined <["--"], "hash-style=">, + HelpText<"Set the type of linker's hash table(s)">, + Group<grp_opts>; +def merge_strings : Flag<["--"], "merge-strings">, + HelpText<"Merge common strings across mergeable sections">, + Group<grp_opts>; +def eh_frame_hdr : Flag<["--"], "eh-frame-hdr">, + HelpText<"Request creation of .eh_frame_hdr section and ELF " + " PT_GNU_EH_FRAME segment header">, + Group<grp_opts>; + +//===----------------------------------------------------------------------===// +/// Tracing Options +//===----------------------------------------------------------------------===// +def grp_tracingopts : OptionGroup<"opts">, + HelpText<"TRACING OPTIONS">; +def t : Flag<["-"], "t">, + HelpText<"Print the names of the input files as ld processes them">, + Group<grp_tracingopts>; +def stats : Flag<["--"], "stats">, + HelpText<"Print time and memory usage stats">, Group<grp_tracingopts>; + +//===----------------------------------------------------------------------===// +/// Extensions +//===----------------------------------------------------------------------===// +def grp_extns : OptionGroup<"opts">, + HelpText<"Extensions">; +def output_filetype: Separate<["--"], "output-filetype">, + HelpText<"Specify what type of output file that lld creates, YAML/Native">, + Group<grp_extns>; +def alias_output_filetype: Joined<["--"], "output-filetype=">, + Alias<output_filetype>; + +//===----------------------------------------------------------------------===// +/// Ignored options +//===----------------------------------------------------------------------===// +def grp_ignored: OptionGroup<"ignored">, + HelpText<"GNU Options ignored for Compatibility ">; +def dashg : Flag<["-"], "g">, + HelpText<"Ignored.">, + Group<grp_ignored>; +def Qy : Flag<["-"], "Qy">, + HelpText<"Ignored for SVR4 Compatibility">, + Group<grp_ignored>; +def qmagic : Flag<["-"], "qmagic">, + HelpText<"Ignored for Linux Compatibility">, + Group<grp_ignored>; + +//===----------------------------------------------------------------------===// +/// Help +//===----------------------------------------------------------------------===// +def help : Flag<["--"], "help">, + HelpText<"Display this help message">; diff --git a/lib/Driver/Makefile b/lib/Driver/Makefile new file mode 100644 index 000000000000..19024cfab0f1 --- /dev/null +++ b/lib/Driver/Makefile @@ -0,0 +1,38 @@ +##===- lld/lib/Driver/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../.. +LIBRARYNAME := lldDriver + +BUILT_SOURCES = CoreOptions.inc UniversalDriverOptions.inc DarwinLdOptions.inc \ + GnuLdOptions.inc WinLinkOptions.inc + +TABLEGEN_INC_FILES_COMMON = 1 + +include $(LLD_LEVEL)/Makefile + +$(ObjDir)/CoreOptions.inc.tmp : CoreOptions.td $(LLVM_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building LLD CoreOptions Option tables with tblgen" + $(Verb) $(LLVMTableGen) -gen-opt-parser-defs -o $(call SYSPATH, $@) $< + +$(ObjDir)/UniversalDriverOptions.inc.tmp : UniversalDriverOptions.td $(LLVM_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building LLD Universal Driver Options tables with tblgen" + $(Verb) $(LLVMTableGen) -gen-opt-parser-defs -o $(call SYSPATH, $@) $< + +$(ObjDir)/DarwinLdOptions.inc.tmp : DarwinLdOptions.td $(LLVM_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building LLD Darwin ld Option tables with tblgen" + $(Verb) $(LLVMTableGen) -gen-opt-parser-defs -o $(call SYSPATH, $@) $< + +$(ObjDir)/GnuLdOptions.inc.tmp : GnuLdOptions.td $(LLVM_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building LLD Gnu ld Option tables with tblgen" + $(Verb) $(LLVMTableGen) -gen-opt-parser-defs -o $(call SYSPATH, $@) $< + +$(ObjDir)/WinLinkOptions.inc.tmp : WinLinkOptions.td $(LLVM_TBLGEN) $(ObjDir)/.dir + $(Echo) "Building LLD WinLinkOptions Option tables with tblgen" + $(Verb) $(LLVMTableGen) -gen-opt-parser-defs -o $(call SYSPATH, $@) $< diff --git a/lib/Driver/TODO.rst b/lib/Driver/TODO.rst new file mode 100644 index 000000000000..e03d829c232d --- /dev/null +++ b/lib/Driver/TODO.rst @@ -0,0 +1,101 @@ +GNU ld Driver +~~~~~~~~~~~~~ + +Missing Options +############### + +* --audit +* -A,--architecture +* -b,--format +* -d,-dc,-dp +* -P,--depaudit +* --exclude-libs +* --exclude-modules-for-implib +* -E,--export-dynamic,--no-export-dynamic +* -EB (We probably shouldn't support this) +* -EL (We probably shouldn't support this) +* -f,--auxiliary +* -F,--filter +* -G,--gpsize +* -h +* -i +* --library +* -M +* --print-map +* -output +* -O +* -q,--emit-relocs +* --force-dynamic +* --relocatable +* -R,--just-symbols +* -s,--strip-all +* -S,--strip-debug +* --trace +* -dT,--default-script +* -Ur +* --unique +* -v,--version,-V +* -x,--discard-all +* -X,--discard-locals +* -y,--trace-symbol +* -z (keywords need to be implemented) +* --accept-unknown-input-arch,--no-accept-unknown-input-arch +* -Bdynamic,-dy,-call_shared +* -Bgroup +* -dn,-non_shared +* -Bsymbolic +* -Bsymbolic-functions +* --dynamic-list +* --dynamic-list-data +* --dynamic-list-cpp-new +* --dynamic-list-cpp-typeinfo +* --check-sections,--no-check-sections +* --copy-dt-needed-entries,--no-copy-dt-needed-entires +* --cref +* --no-define-common +* --defsym (only absolute value supported now) +* --demangle,--no-demangle +* -I +* --fatal-warnings,--no-fatal-warnings +* --force-exe-suffix +* --gc-sections,--no-gc-sections +* --print-gc-sections,--no-print-gc-sections +* --print-output-format +* --target-help +* -Map +* --no-keep-memory +* --no-undefined,-z defs +* --allow-shlib-undefined,--no-alow-shlib-undefined +* --no-undefined-version +* --default-symver +* --default-imported-symver +* --no-warn-mismatch +* --no-warn-search-mismatch +* --oformat +* -pie,--pic-executable +* --relax,--no-relax +* --retain-symbols-file +* --sort-common +* --sort-section={name,alignment} +* --split-by-file +* --split-by-reloc +* --stats +* --section-start +* -T{bss,data,text,{text,rodata,data}-segment} +* --unresolved-symbols +* -dll-verbose,--verbose +* --version-script +* --warn-common +* --warn-constructors +* --warn-multiple-gp +* --warn-once +* --warn-section-align +* --warn-shared-textrel +* --warn-alternate-em +* --warn-unresolved-symbols +* --error-unresolved-symbols +* --wrap +* --no-ld-generated-unwind-info +* --hash-size +* --reduce-memory-overheads +* --build-id diff --git a/lib/Driver/UniversalDriver.cpp b/lib/Driver/UniversalDriver.cpp new file mode 100644 index 000000000000..7d42ad7b4bfc --- /dev/null +++ b/lib/Driver/UniversalDriver.cpp @@ -0,0 +1,218 @@ +//===- lib/Driver/UniversalDriver.cpp -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Driver for "universal" lld tool which can mimic any linker command line +/// parsing once it figures out which command line flavor to use. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Driver/Driver.h" +#include "lld/Config/Version.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in GnuLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "UniversalDriverOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in GnuLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "UniversalDriverOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in GnuLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { \ + PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS \ + } \ + , +#include "UniversalDriverOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class UniversalDriverOptTable : public llvm::opt::OptTable { +public: + UniversalDriverOptTable() + : OptTable(infoTable, llvm::array_lengthof(infoTable)) {} +}; + +enum class Flavor { + invalid, + gnu_ld, // -flavor gnu + win_link, // -flavor link + darwin_ld, // -flavor darwin + core // -flavor core OR -core +}; + +struct ProgramNameParts { + StringRef _target; + StringRef _flavor; +}; + +} // anonymous namespace + +static Flavor strToFlavor(StringRef str) { + return llvm::StringSwitch<Flavor>(str) + .Case("gnu", Flavor::gnu_ld) + .Case("link", Flavor::win_link) + .Case("lld-link", Flavor::win_link) + .Case("darwin", Flavor::darwin_ld) + .Case("core", Flavor::core) + .Case("ld", Flavor::gnu_ld) + .Default(Flavor::invalid); +} + +static ProgramNameParts parseProgramName(StringRef programName) { + SmallVector<StringRef, 3> components; + llvm::SplitString(programName, components, "-"); + ProgramNameParts ret; + + using std::begin; + using std::end; + + // Erase any lld components. + components.erase(std::remove(components.begin(), components.end(), "lld"), + components.end()); + + // Find the flavor component. + auto flIter = std::find_if(components.begin(), components.end(), + [](StringRef str) -> bool { + return strToFlavor(str) != Flavor::invalid; + }); + + if (flIter != components.end()) { + ret._flavor = *flIter; + components.erase(flIter); + } + + // Any remaining component must be the target. + if (components.size() == 1) + ret._target = components[0]; + + return ret; +} + +// Removes the argument from argv along with its value, if exists, and updates +// argc. +static void removeArg(llvm::opt::Arg *arg, int &argc, const char **&argv) { + unsigned int numToRemove = arg->getNumValues() + 1; + unsigned int argIndex = arg->getIndex() + 1; + + std::rotate(&argv[argIndex], &argv[argIndex + numToRemove], argv + argc); + argc -= numToRemove; +} + +static Flavor getFlavor(int &argc, const char **&argv, + std::unique_ptr<llvm::opt::InputArgList> &parsedArgs) { + if (llvm::opt::Arg *argCore = parsedArgs->getLastArg(OPT_core)) { + removeArg(argCore, argc, argv); + return Flavor::core; + } + if (llvm::opt::Arg *argFlavor = parsedArgs->getLastArg(OPT_flavor)) { + removeArg(argFlavor, argc, argv); + return strToFlavor(argFlavor->getValue()); + } + +#if LLVM_ON_UNIX + if (llvm::sys::path::filename(argv[0]).equals("ld")) { +#if __APPLE__ + // On a Darwin systems, if linker binary is named "ld", use Darwin driver. + return Flavor::darwin_ld; +#endif + // On a ELF based systems, if linker binary is named "ld", use gnu driver. + return Flavor::gnu_ld; + } +#endif + + StringRef name = llvm::sys::path::stem(argv[0]); + return strToFlavor(parseProgramName(name)._flavor); +} + +namespace lld { + +bool UniversalDriver::link(int argc, const char *argv[], + raw_ostream &diagnostics) { + // Parse command line options using GnuLdOptions.td + std::unique_ptr<llvm::opt::InputArgList> parsedArgs; + UniversalDriverOptTable table; + unsigned missingIndex; + unsigned missingCount; + + // Program name + StringRef programName = llvm::sys::path::stem(argv[0]); + + parsedArgs.reset( + table.ParseArgs(&argv[1], &argv[argc], missingIndex, missingCount)); + + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs->getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + // Handle -help + if (parsedArgs->getLastArg(OPT_help)) { + table.PrintHelp(llvm::outs(), programName.data(), "LLVM Linker", false); + return true; + } + + // Handle -version + if (parsedArgs->getLastArg(OPT_version)) { + diagnostics << "LLVM Linker Version: " << getLLDVersion() + << getLLDRepositoryVersion() << "\n"; + return true; + } + + Flavor flavor = getFlavor(argc, argv, parsedArgs); + std::vector<const char *> args(argv, argv + argc); + + // Switch to appropriate driver. + switch (flavor) { + case Flavor::gnu_ld: + return GnuLdDriver::linkELF(args.size(), args.data(), diagnostics); + case Flavor::darwin_ld: + return DarwinLdDriver::linkMachO(args.size(), args.data(), diagnostics); + case Flavor::win_link: + return WinLinkDriver::linkPECOFF(args.size(), args.data(), diagnostics); + case Flavor::core: + return CoreDriver::link(args.size(), args.data(), diagnostics); + case Flavor::invalid: + diagnostics << "Select the appropriate flavor\n"; + table.PrintHelp(llvm::outs(), programName.data(), "LLVM Linker", false); + return false; + } + llvm_unreachable("Unrecognised flavor"); +} + +} // end namespace lld diff --git a/lib/Driver/UniversalDriverOptions.td b/lib/Driver/UniversalDriverOptions.td new file mode 100644 index 000000000000..14abc9ce9911 --- /dev/null +++ b/lib/Driver/UniversalDriverOptions.td @@ -0,0 +1,19 @@ +include "llvm/Option/OptParser.td" + +// Select an optional flavor +def flavor: Separate<["-"], "flavor">, + HelpText<"Flavor for linking, options are gnu/darwin/link">; + +// Select the core flavor +def core : Flag<["-"], "core">, + HelpText<"CORE linking">; + +def target: Separate<["-"], "target">, + HelpText<"Select the target">; + +def version: Flag<["-"], "version">, + HelpText<"Display the version">; + +// Help message +def help : Flag<["-"], "help">, + HelpText<"Display this help message">; diff --git a/lib/Driver/WinLinkDriver.cpp b/lib/Driver/WinLinkDriver.cpp new file mode 100644 index 000000000000..6ee7a5a004b5 --- /dev/null +++ b/lib/Driver/WinLinkDriver.cpp @@ -0,0 +1,1371 @@ +//===- lib/Driver/WinLinkDriver.cpp ---------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for Windows link.exe. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Driver/Driver.h" +#include "lld/Driver/WinLinkModuleDef.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Object/COFF.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cctype> +#include <map> +#include <memory> +#include <sstream> +#include <tuple> + +namespace lld { + +// +// Option definitions +// + +// Create enum with OPT_xxx values for each option in WinLinkOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "WinLinkOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in WinLinkOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "WinLinkOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in WinLinkOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "WinLinkOptions.inc" +#undef OPTION +}; + +namespace { + +// Create OptTable class for parsing actual command line arguments +class WinLinkOptTable : public llvm::opt::OptTable { +public: + // link.exe's command line options are case insensitive, unlike + // other driver's options for Unix. + WinLinkOptTable() + : OptTable(infoTable, llvm::array_lengthof(infoTable), + /* ignoreCase */ true) {} +}; + +} // anonymous namespace + +// +// Functions to parse each command line option +// + +// Split the given string with spaces. +static std::vector<std::string> splitArgList(const std::string &str) { + std::stringstream stream(str); + std::istream_iterator<std::string> begin(stream); + std::istream_iterator<std::string> end; + return std::vector<std::string>(begin, end); +} + +// Split the given string with the path separator. +static std::vector<StringRef> splitPathList(StringRef str) { + std::vector<StringRef> ret; + while (!str.empty()) { + StringRef path; + std::tie(path, str) = str.split(';'); + ret.push_back(path); + } + return ret; +} + +// Parse an argument for /alternatename. The expected string is +// "<string>=<string>". +static bool parseAlternateName(StringRef arg, StringRef &weak, StringRef &def, + raw_ostream &diag) { + std::tie(weak, def) = arg.split('='); + if (weak.empty() || def.empty()) { + diag << "Error: malformed /alternatename option: " << arg << "\n"; + return false; + } + return true; +} + +// Parse an argument for /base, /stack or /heap. The expected string +// is "<integer>[,<integer>]". +static bool parseMemoryOption(StringRef arg, uint64_t &reserve, + uint64_t &commit) { + StringRef reserveStr, commitStr; + std::tie(reserveStr, commitStr) = arg.split(','); + if (reserveStr.getAsInteger(0, reserve)) + return false; + if (!commitStr.empty() && commitStr.getAsInteger(0, commit)) + return false; + return true; +} + +// Parse an argument for /version or /subsystem. The expected string is +// "<integer>[.<integer>]". +static bool parseVersion(StringRef arg, uint32_t &major, uint32_t &minor) { + StringRef majorVersion, minorVersion; + std::tie(majorVersion, minorVersion) = arg.split('.'); + if (minorVersion.empty()) + minorVersion = "0"; + if (majorVersion.getAsInteger(0, major)) + return false; + if (minorVersion.getAsInteger(0, minor)) + return false; + return true; +} + +// Returns subsystem type for the given string. +static llvm::COFF::WindowsSubsystem stringToWinSubsystem(StringRef str) { + return llvm::StringSwitch<llvm::COFF::WindowsSubsystem>(str.lower()) + .Case("windows", llvm::COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI) + .Case("console", llvm::COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI) + .Case("boot_application", + llvm::COFF::IMAGE_SUBSYSTEM_WINDOWS_BOOT_APPLICATION) + .Case("efi_application", llvm::COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION) + .Case("efi_boot_service_driver", + llvm::COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER) + .Case("efi_rom", llvm::COFF::IMAGE_SUBSYSTEM_EFI_ROM) + .Case("efi_runtime_driver", + llvm::COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER) + .Case("native", llvm::COFF::IMAGE_SUBSYSTEM_NATIVE) + .Case("posix", llvm::COFF::IMAGE_SUBSYSTEM_POSIX_CUI) + .Default(llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN); +} + +// Parse /subsystem command line option. The form of /subsystem is +// "subsystem_name[,majorOSVersion[.minorOSVersion]]". +static bool parseSubsystem(StringRef arg, + llvm::COFF::WindowsSubsystem &subsystem, + llvm::Optional<uint32_t> &major, + llvm::Optional<uint32_t> &minor, raw_ostream &diag) { + StringRef subsystemStr, osVersion; + std::tie(subsystemStr, osVersion) = arg.split(','); + if (!osVersion.empty()) { + uint32_t v1, v2; + if (!parseVersion(osVersion, v1, v2)) + return false; + major = v1; + minor = v2; + } + subsystem = stringToWinSubsystem(subsystemStr); + if (subsystem == llvm::COFF::IMAGE_SUBSYSTEM_UNKNOWN) { + diag << "error: unknown subsystem name: " << subsystemStr << "\n"; + return false; + } + return true; +} + +static llvm::COFF::MachineTypes stringToMachineType(StringRef str) { + // FIXME: we have no way to differentiate between ARM and ARMNT currently. + // However, given that LLVM only supports ARM NT, default to that for now. + return llvm::StringSwitch<llvm::COFF::MachineTypes>(str.lower()) + .Case("arm", llvm::COFF::IMAGE_FILE_MACHINE_ARMNT) + .Case("x64", llvm::COFF::IMAGE_FILE_MACHINE_AMD64) + .Case("x86", llvm::COFF::IMAGE_FILE_MACHINE_I386) + .Default(llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN); +} + +// Parse /section:name,[[!]{DEKPRSW}] +// +// /section option is to set non-default bits in the Characteristics fields of +// the section header. D, E, K, P, R, S, and W represent discardable, +// execute, not_cachable, not_pageable, read, shared, and write bits, +// respectively. You can specify multiple flags in one /section option. +// +// If the flag starts with "!", the flags represent a mask that should be turned +// off regardless of the default value. You can even create a section which is +// not readable, writable nor executable with this -- although it's probably +// useless. +static bool parseSection(StringRef option, std::string §ion, + llvm::Optional<uint32_t> &flags, + llvm::Optional<uint32_t> &mask) { + StringRef flagString; + std::tie(section, flagString) = option.split(","); + + bool negative = false; + if (flagString.startswith("!")) { + negative = true; + flagString = flagString.substr(1); + } + if (flagString.empty()) + return false; + + uint32_t attribs = 0; + for (size_t i = 0, e = flagString.size(); i < e; ++i) { + switch (tolower(flagString[i])) { +#define CASE(c, flag) \ + case c: \ + attribs |= flag; \ + break + CASE('d', llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE); + CASE('e', llvm::COFF::IMAGE_SCN_MEM_EXECUTE); + CASE('k', llvm::COFF::IMAGE_SCN_MEM_NOT_CACHED); + CASE('p', llvm::COFF::IMAGE_SCN_MEM_NOT_PAGED); + CASE('r', llvm::COFF::IMAGE_SCN_MEM_READ); + CASE('s', llvm::COFF::IMAGE_SCN_MEM_SHARED); + CASE('w', llvm::COFF::IMAGE_SCN_MEM_WRITE); +#undef CASE + default: + return false; + } + } + + if (negative) { + mask = attribs; + } else { + flags = attribs; + } + return true; +} + +static bool readFile(PECOFFLinkingContext &ctx, StringRef path, + ArrayRef<uint8_t> &result) { + ErrorOr<std::unique_ptr<MemoryBuffer>> buf = MemoryBuffer::getFile(path); + if (!buf) + return false; + StringRef Data = buf.get()->getBuffer(); + result = ctx.allocate(ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(Data.begin()), Data.size())); + return true; +} + +// Parse /manifest:EMBED[,ID=#]|NO. +static bool parseManifest(StringRef option, bool &enable, bool &embed, + int &id) { + if (option.equals_lower("no")) { + enable = false; + return true; + } + if (!option.startswith_lower("embed")) + return false; + + embed = true; + option = option.substr(strlen("embed")); + if (option.empty()) + return true; + if (!option.startswith_lower(",id=")) + return false; + option = option.substr(strlen(",id=")); + if (option.getAsInteger(0, id)) + return false; + return true; +} + +static bool isLibraryFile(StringRef path) { + return path.endswith_lower(".lib") || path.endswith_lower(".imp"); +} + +static StringRef getObjectPath(PECOFFLinkingContext &ctx, StringRef path) { + std::string result; + if (isLibraryFile(path)) { + result = ctx.searchLibraryFile(path); + } else if (llvm::sys::path::extension(path).empty()) { + result = path.str() + ".obj"; + } else { + result = path; + } + return ctx.allocate(result); +} + +static StringRef getLibraryPath(PECOFFLinkingContext &ctx, StringRef path) { + std::string result = isLibraryFile(path) + ? ctx.searchLibraryFile(path) + : ctx.searchLibraryFile(path.str() + ".lib"); + return ctx.allocate(result); +} + +// Returns true if the given file is a Windows resource file. +static bool isResoruceFile(StringRef path) { + llvm::sys::fs::file_magic fileType; + if (llvm::sys::fs::identify_magic(path, fileType)) { + // If we cannot read the file, assume it's not a resource file. + // The further stage will raise an error on this unreadable file. + return false; + } + return fileType == llvm::sys::fs::file_magic::windows_resource; +} + +// Merge Windows resource files and convert them to a single COFF file. +// The temporary file path is set to result. +static bool convertResourceFiles(PECOFFLinkingContext &ctx, + std::vector<std::string> inFiles, + std::string &result) { + // Create an output file path. + SmallString<128> outFile; + if (llvm::sys::fs::createTemporaryFile("resource", "obj", outFile)) + return false; + std::string outFileArg = ("/out:" + outFile).str(); + + // Construct CVTRES.EXE command line and execute it. + std::string program = "cvtres.exe"; + ErrorOr<std::string> programPathOrErr = llvm::sys::findProgramByName(program); + if (!programPathOrErr) { + llvm::errs() << "Unable to find " << program << " in PATH\n"; + return false; + } + const std::string &programPath = *programPathOrErr; + + std::vector<const char *> args; + args.push_back(programPath.c_str()); + args.push_back(ctx.is64Bit() ? "/machine:x64" : "/machine:x86"); + args.push_back("/readonly"); + args.push_back("/nologo"); + args.push_back(outFileArg.c_str()); + for (const std::string &path : inFiles) + args.push_back(path.c_str()); + args.push_back(nullptr); + + if (llvm::sys::ExecuteAndWait(programPath.c_str(), &args[0]) != 0) { + llvm::errs() << program << " failed\n"; + return false; + } + result = outFile.str(); + return true; +} + +// Parse /manifestuac:(level=<string>|uiAccess=<string>). +// +// The arguments will be embedded to the manifest XML file with no error check, +// so the values given via the command line must be valid as XML attributes. +// This may sound a bit odd, but that's how link.exe works, so we will follow. +static bool parseManifestUAC(StringRef option, + llvm::Optional<std::string> &level, + llvm::Optional<std::string> &uiAccess) { + for (;;) { + option = option.ltrim(); + if (option.empty()) + return true; + if (option.startswith_lower("level=")) { + option = option.substr(strlen("level=")); + StringRef value; + std::tie(value, option) = option.split(" "); + level = value.str(); + continue; + } + if (option.startswith_lower("uiaccess=")) { + option = option.substr(strlen("uiaccess=")); + StringRef value; + std::tie(value, option) = option.split(" "); + uiAccess = value.str(); + continue; + } + return false; + } +} + +// Returns the machine type (e.g. x86) of the given input file. +// If the file is not COFF, returns false. +static bool getMachineType(StringRef path, llvm::COFF::MachineTypes &result) { + llvm::sys::fs::file_magic fileType; + if (llvm::sys::fs::identify_magic(path, fileType)) + return false; + if (fileType != llvm::sys::fs::file_magic::coff_object) + return false; + ErrorOr<std::unique_ptr<MemoryBuffer>> buf = MemoryBuffer::getFile(path); + if (!buf) + return false; + std::error_code ec; + llvm::object::COFFObjectFile obj(buf.get()->getMemBufferRef(), ec); + if (ec) + return false; + result = static_cast<llvm::COFF::MachineTypes>(obj.getMachine()); + return true; +} + +// Parse /export:entryname[=internalname][,@ordinal[,NONAME]][,DATA][,PRIVATE]. +// +// MSDN doesn't say anything about /export:foo=bar style option or PRIVATE +// attribtute, but link.exe actually accepts them. +static bool parseExport(StringRef option, + PECOFFLinkingContext::ExportDesc &ret) { + StringRef name; + StringRef rest; + std::tie(name, rest) = option.split(","); + if (name.empty()) + return false; + if (name.find('=') == StringRef::npos) { + ret.name = name; + } else { + std::tie(ret.externalName, ret.name) = name.split("="); + if (ret.name.empty()) + return false; + } + + for (;;) { + if (rest.empty()) + return true; + StringRef arg; + std::tie(arg, rest) = rest.split(","); + if (arg.equals_lower("noname")) { + if (ret.ordinal < 0) + return false; + ret.noname = true; + continue; + } + if (arg.equals_lower("data")) { + ret.isData = true; + continue; + } + if (arg.equals_lower("private")) { + ret.isPrivate = true; + continue; + } + if (arg.startswith("@")) { + int ordinal; + if (arg.substr(1).getAsInteger(0, ordinal)) + return false; + if (ordinal <= 0 || 65535 < ordinal) + return false; + ret.ordinal = ordinal; + continue; + } + return false; + } +} + +// Read module-definition file. +static bool parseDef(StringRef option, llvm::BumpPtrAllocator &alloc, + std::vector<moduledef::Directive *> &result) { + ErrorOr<std::unique_ptr<MemoryBuffer>> buf = MemoryBuffer::getFile(option); + if (!buf) + return false; + moduledef::Lexer lexer(std::move(buf.get())); + moduledef::Parser parser(lexer, alloc); + return parser.parse(result); +} + +static StringRef replaceExtension(PECOFFLinkingContext &ctx, StringRef path, + StringRef extension) { + SmallString<128> val = path; + llvm::sys::path::replace_extension(val, extension); + return ctx.allocate(val.str()); +} + +// Create a manifest file contents. +static std::string createManifestXml(PECOFFLinkingContext &ctx) { + std::string ret; + llvm::raw_string_ostream out(ret); + // Emit the XML. Note that we do *not* verify that the XML attributes are + // syntactically correct. This is intentional for link.exe compatibility. + out << "<?xml version=\"1.0\" standalone=\"yes\"?>\n" + "<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\"\n" + " manifestVersion=\"1.0\">\n"; + if (ctx.getManifestUAC()) { + out << " <trustInfo>\n" + " <security>\n" + " <requestedPrivileges>\n" + " <requestedExecutionLevel level=" << ctx.getManifestLevel() + << " uiAccess=" << ctx.getManifestUiAccess() + << "/>\n" + " </requestedPrivileges>\n" + " </security>\n" + " </trustInfo>\n"; + const std::string &dependency = ctx.getManifestDependency(); + if (!dependency.empty()) { + out << " <dependency>\n" + " <dependentAssembly>\n" + " <assemblyIdentity " << dependency + << " />\n" + " </dependentAssembly>\n" + " </dependency>\n"; + } + } + out << "</assembly>\n"; + out.flush(); + return ret; +} + +// Convert one doublequote to two doublequotes, so that we can embed the string +// into a resource script file. +static void quoteAndPrintXml(raw_ostream &out, StringRef str) { + for (;;) { + if (str.empty()) + return; + StringRef line; + std::tie(line, str) = str.split("\n"); + if (line.empty()) + continue; + out << '\"'; + const char *p = line.data(); + for (int i = 0, size = line.size(); i < size; ++i) { + switch (p[i]) { + case '\"': + out << '\"'; + // fallthrough + default: + out << p[i]; + } + } + out << "\"\n"; + } +} + +// Create a resource file (.res file) containing the manifest XML. This is done +// in two steps: +// +// 1. Create a resource script file containing the XML as a literal string. +// 2. Run RC.EXE command to compile the script file to a resource file. +// +// The temporary file created in step 1 will be deleted on exit from this +// function. The file created in step 2 will have the same lifetime as the +// PECOFFLinkingContext. +static bool createManifestResourceFile(PECOFFLinkingContext &ctx, + raw_ostream &diag, + std::string &resFile) { + // Create a temporary file for the resource script file. + SmallString<128> rcFileSmallString; + if (llvm::sys::fs::createTemporaryFile("tmp", "rc", rcFileSmallString)) { + diag << "Cannot create a temporary file\n"; + return false; + } + StringRef rcFile(rcFileSmallString.str()); + llvm::FileRemover rcFileRemover((Twine(rcFile))); + + // Open the temporary file for writing. + std::error_code ec; + llvm::raw_fd_ostream out(rcFileSmallString, ec, llvm::sys::fs::F_Text); + if (ec) { + diag << "Failed to open " << ctx.getManifestOutputPath() << ": " + << ec.message() << "\n"; + return false; + } + + // Write resource script to the RC file. + out << "#define LANG_ENGLISH 9\n" + << "#define SUBLANG_DEFAULT 1\n" + << "#define APP_MANIFEST " << ctx.getManifestId() << "\n" + << "#define RT_MANIFEST 24\n" + << "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n" + << "APP_MANIFEST RT_MANIFEST {\n"; + quoteAndPrintXml(out, createManifestXml(ctx)); + out << "}\n"; + out.close(); + + // Create output resource file. + SmallString<128> resFileSmallString; + if (llvm::sys::fs::createTemporaryFile("tmp", "res", resFileSmallString)) { + diag << "Cannot create a temporary file"; + return false; + } + resFile = resFileSmallString.str(); + + // Register the resource file path so that the file will be deleted when the + // context's destructor is called. + ctx.registerTemporaryFile(resFile); + + // Run RC.EXE /fo tmp.res tmp.rc + std::string program = "rc.exe"; + ErrorOr<std::string> programPathOrErr = llvm::sys::findProgramByName(program); + if (!programPathOrErr) { + diag << "Unable to find " << program << " in PATH\n"; + return false; + } + const std::string &programPath = *programPathOrErr; + std::vector<const char *> args; + args.push_back(programPath.c_str()); + args.push_back("/fo"); + args.push_back(resFile.c_str()); + args.push_back("/nologo"); + args.push_back(rcFileSmallString.c_str()); + args.push_back(nullptr); + + if (llvm::sys::ExecuteAndWait(programPath.c_str(), &args[0]) != 0) { + diag << program << " failed\n"; + return false; + } + return true; +} + + +// Create the a side-by-side manifest file. +// +// The manifest file will convey some information to the linker, such as whether +// the binary needs to run as Administrator or not. Instead of being placed in +// the PE/COFF header, it's in XML format for some reason -- I guess it's +// probably because it's invented in the early dot-com era. +// +// The side-by-side manifest file is a separate XML file having ".manifest" +// extension. It will be created in the same directory as the resulting +// executable. +static bool createSideBySideManifestFile(PECOFFLinkingContext &ctx, + raw_ostream &diag) { + std::string path = ctx.getManifestOutputPath(); + if (path.empty()) { + // Default name of the manifest file is "foo.exe.manifest" where "foo.exe" is + // the output path. + path = ctx.outputPath(); + path.append(".manifest"); + } + + std::error_code ec; + llvm::raw_fd_ostream out(path, ec, llvm::sys::fs::F_Text); + if (ec) { + diag << ec.message() << "\n"; + return false; + } + out << createManifestXml(ctx); + return true; +} + +// Handle /failifmismatch option. +static bool +handleFailIfMismatchOption(StringRef option, + std::map<StringRef, StringRef> &mustMatch, + raw_ostream &diag) { + StringRef key, value; + std::tie(key, value) = option.split('='); + if (key.empty() || value.empty()) { + diag << "error: malformed /failifmismatch option: " << option << "\n"; + return true; + } + auto it = mustMatch.find(key); + if (it != mustMatch.end() && it->second != value) { + diag << "error: mismatch detected: '" << it->second << "' and '" << value + << "' for key '" << key << "'\n"; + return true; + } + mustMatch[key] = value; + return false; +} + +// +// Environment variable +// + +// Process "LINK" environment variable. If defined, the value of the variable +// should be processed as command line arguments. +static std::vector<const char *> processLinkEnv(PECOFFLinkingContext &ctx, + int argc, const char **argv) { + std::vector<const char *> ret; + // The first argument is the name of the command. This should stay at the head + // of the argument list. + assert(argc > 0); + ret.push_back(argv[0]); + + // Add arguments specified by the LINK environment variable. + llvm::Optional<std::string> env = llvm::sys::Process::GetEnv("LINK"); + if (env.hasValue()) + for (std::string &arg : splitArgList(*env)) + ret.push_back(ctx.allocate(arg).data()); + + // Add the rest of arguments passed via the command line. + for (int i = 1; i < argc; ++i) + ret.push_back(argv[i]); + ret.push_back(nullptr); + return ret; +} + +// Process "LIB" environment variable. The variable contains a list of search +// paths separated by semicolons. +static void processLibEnv(PECOFFLinkingContext &ctx) { + llvm::Optional<std::string> env = llvm::sys::Process::GetEnv("LIB"); + if (env.hasValue()) + for (StringRef path : splitPathList(*env)) + ctx.appendInputSearchPath(ctx.allocate(path)); +} + +namespace { +class DriverStringSaver : public llvm::cl::StringSaver { +public: + DriverStringSaver(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + const char *SaveString(const char *s) override { + return _ctx.allocate(StringRef(s)).data(); + } + +private: + PECOFFLinkingContext &_ctx; +}; +} + +// Tokenize command line options in a given file and add them to result. +static bool readResponseFile(StringRef path, PECOFFLinkingContext &ctx, + std::vector<const char *> &result) { + ArrayRef<uint8_t> contents; + if (!readFile(ctx, path, contents)) + return false; + StringRef contentsStr(reinterpret_cast<const char *>(contents.data()), + contents.size()); + DriverStringSaver saver(ctx); + SmallVector<const char *, 0> args; + llvm::cl::TokenizeWindowsCommandLine(contentsStr, saver, args); + for (const char *s : args) + result.push_back(s); + return true; +} + +// Expand arguments starting with "@". It's an error if a specified file does +// not exist. Returns true on success. +static bool expandResponseFiles(int &argc, const char **&argv, + PECOFFLinkingContext &ctx, raw_ostream &diag, + bool &expanded) { + std::vector<const char *> newArgv; + for (int i = 0; i < argc; ++i) { + if (argv[i][0] != '@') { + newArgv.push_back(argv[i]); + continue; + } + StringRef filename = StringRef(argv[i] + 1); + if (!readResponseFile(filename, ctx, newArgv)) { + diag << "error: cannot read response file: " << filename << "\n"; + return false; + } + expanded = true; + } + if (!expanded) + return true; + argc = newArgv.size(); + newArgv.push_back(nullptr); + argv = &ctx.allocateCopy(newArgv)[0]; + return true; +} + +// Parses the given command line options and returns the result. Returns NULL if +// there's an error in the options. +static std::unique_ptr<llvm::opt::InputArgList> +parseArgs(int argc, const char **argv, PECOFFLinkingContext &ctx, + raw_ostream &diag, bool isReadingDirectiveSection) { + // Expand arguments starting with "@". + bool expanded = false; + if (!expandResponseFiles(argc, argv, ctx, diag, expanded)) + return nullptr; + + // Parse command line options using WinLinkOptions.td + std::unique_ptr<llvm::opt::InputArgList> parsedArgs; + WinLinkOptTable table; + unsigned missingIndex; + unsigned missingCount; + parsedArgs.reset(table.ParseArgs(&argv[1], &argv[argc], + missingIndex, missingCount)); + if (missingCount) { + diag << "error: missing arg value for '" + << parsedArgs->getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return nullptr; + } + + // Show warning for unknown arguments. In .drectve section, unknown options + // starting with "-?" are silently ignored. This is a COFF's feature to embed a + // new linker option to an object file while keeping backward compatibility. + for (auto unknownArg : parsedArgs->filtered(OPT_UNKNOWN)) { + StringRef arg = unknownArg->getSpelling(); + if (isReadingDirectiveSection && arg.startswith("-?")) + continue; + diag << "warning: ignoring unknown argument: " << arg << "\n"; + } + + // Copy mllvm + for (auto arg : parsedArgs->filtered(OPT_mllvm)) + ctx.appendLLVMOption(arg->getValue()); + + // If we have expaneded response files and /verbose is given, print out the + // final command line. + if (!isReadingDirectiveSection && expanded && + parsedArgs->getLastArg(OPT_verbose)) { + diag << "Command line:"; + for (int i = 0; i < argc; ++i) + diag << " " << argv[i]; + diag << "\n\n"; + } + + return parsedArgs; +} + +// Returns true if the given file node has already been added to the input +// graph. +static bool hasLibrary(PECOFFLinkingContext &ctx, File *file) { + StringRef path = file->path(); + for (std::unique_ptr<Node> &p : ctx.getNodes()) + if (auto *f = dyn_cast<FileNode>(p.get())) + if (f->getFile()->path() == path) + return true; + return false; +} + +// If the first command line argument is "/lib", link.exe acts as if it's +// "lib.exe" command. This is for backward compatibility. +// http://msdn.microsoft.com/en-us/library/h34w59b3.aspx +static bool maybeRunLibCommand(int argc, const char **argv, raw_ostream &diag) { + if (argc <= 1) + return false; + if (!StringRef(argv[1]).equals_lower("/lib")) + return false; + ErrorOr<std::string> pathOrErr = llvm::sys::findProgramByName("lib.exe"); + if (!pathOrErr) { + diag << "Unable to find lib.exe in PATH\n"; + return true; + } + const std::string &path = *pathOrErr; + + // Run lib.exe + std::vector<const char *> vec; + vec.push_back(path.c_str()); + for (int i = 2; i < argc; ++i) + vec.push_back(argv[i]); + vec.push_back(nullptr); + + if (llvm::sys::ExecuteAndWait(path.c_str(), &vec[0]) != 0) + diag << "lib.exe failed\n"; + return true; +} + +/// \brief Parse the input file to lld::File. +void addFiles(PECOFFLinkingContext &ctx, StringRef path, raw_ostream &diag, + std::vector<std::unique_ptr<File>> &files) { + for (std::unique_ptr<File> &file : loadFile(ctx, path, false)) { + if (ctx.logInputFiles()) + diag << file->path() << "\n"; + files.push_back(std::move(file)); + } +} + +// +// Main driver +// + +bool WinLinkDriver::linkPECOFF(int argc, const char **argv, raw_ostream &diag) { + if (maybeRunLibCommand(argc, argv, diag)) + return true; + + PECOFFLinkingContext ctx; + ctx.setParseDirectives(parseDirectives); + ctx.registry().addSupportCOFFObjects(ctx); + ctx.registry().addSupportCOFFImportLibraries(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportNativeObjects(); + ctx.registry().addSupportYamlFiles(); + + std::vector<const char *> newargv = processLinkEnv(ctx, argc, argv); + processLibEnv(ctx); + if (!parse(newargv.size() - 1, &newargv[0], ctx, diag)) + return false; + + // Create the file if needed. + if (ctx.getCreateManifest() && !ctx.getEmbedManifest()) + if (!createSideBySideManifestFile(ctx, diag)) + return false; + + return link(ctx, diag); +} + +bool WinLinkDriver::parse(int argc, const char *argv[], + PECOFFLinkingContext &ctx, raw_ostream &diag, + bool isReadingDirectiveSection) { + // Parse may be called from multiple threads simultaneously to parse .drectve + // sections. This function is not thread-safe because it mutates the context + // object. So acquire the lock. + std::lock_guard<std::recursive_mutex> lock(ctx.getMutex()); + + std::map<StringRef, StringRef> failIfMismatchMap; + // Parse the options. + std::unique_ptr<llvm::opt::InputArgList> parsedArgs = + parseArgs(argc, argv, ctx, diag, isReadingDirectiveSection); + if (!parsedArgs) + return false; + + // The list of input files. + std::vector<std::unique_ptr<File>> files; + std::vector<std::unique_ptr<File>> libraries; + + // Handle /help + if (parsedArgs->hasArg(OPT_help)) { + WinLinkOptTable table; + table.PrintHelp(llvm::outs(), argv[0], "LLVM Linker", false); + return false; + } + + // Handle /machine before parsing all the other options, as the target machine + // type affects how to handle other options. For example, x86 needs the + // leading underscore to mangle symbols, while x64 doesn't need it. + if (llvm::opt::Arg *inputArg = parsedArgs->getLastArg(OPT_machine)) { + StringRef arg = inputArg->getValue(); + llvm::COFF::MachineTypes type = stringToMachineType(arg); + if (type == llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN) { + diag << "error: unknown machine type: " << arg << "\n"; + return false; + } + ctx.setMachineType(type); + } else { + // If /machine option is missing, we need to take a look at + // the magic byte of the first object file to infer machine type. + std::vector<StringRef> filePaths; + for (auto arg : *parsedArgs) + if (arg->getOption().getID() == OPT_INPUT) + filePaths.push_back(arg->getValue()); + if (llvm::opt::Arg *arg = parsedArgs->getLastArg(OPT_DASH_DASH)) + filePaths.insert(filePaths.end(), arg->getValues().begin(), + arg->getValues().end()); + for (StringRef path : filePaths) { + llvm::COFF::MachineTypes type; + if (!getMachineType(path, type)) + continue; + if (type == llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN) + continue; + ctx.setMachineType(type); + break; + } + } + + // Handle /nodefaultlib:<lib>. The same option without argument is handled in + // the following for loop. + for (auto *arg : parsedArgs->filtered(OPT_nodefaultlib)) + ctx.addNoDefaultLib(arg->getValue()); + + // Handle /defaultlib. Argument of the option is added to the input file list + // unless it's blacklisted by /nodefaultlib. + std::vector<StringRef> defaultLibs; + for (auto *arg : parsedArgs->filtered(OPT_defaultlib)) + defaultLibs.push_back(arg->getValue()); + + // -alternatename:<alias>=<symbol> + for (auto *arg : parsedArgs->filtered(OPT_alternatename)) { + StringRef weak, def; + if (!parseAlternateName(arg->getValue(), weak, def, diag)) + return false; + ctx.addAlternateName(weak, def); + } + + // Parse /base command line option. The argument for the parameter is in + // the form of "<address>[:<size>]". + if (auto *arg = parsedArgs->getLastArg(OPT_base)) { + uint64_t addr, size; + // Size should be set to SizeOfImage field in the COFF header, and if + // it's smaller than the actual size, the linker should warn about that. + // Currently we just ignore the value of size parameter. + if (!parseMemoryOption(arg->getValue(), addr, size)) + return false; + ctx.setBaseAddress(addr); + } + + // Parse /dll command line option + if (parsedArgs->hasArg(OPT_dll)) { + ctx.setIsDll(true); + // Default base address of a DLL is 0x10000000. + if (!parsedArgs->hasArg(OPT_base)) + ctx.setBaseAddress(0x10000000); + } + + // Parse /stack command line option + if (auto *arg = parsedArgs->getLastArg(OPT_stack)) { + uint64_t reserve; + uint64_t commit = ctx.getStackCommit(); + if (!parseMemoryOption(arg->getValue(), reserve, commit)) + return false; + ctx.setStackReserve(reserve); + ctx.setStackCommit(commit); + } + + // Parse /heap command line option + if (auto *arg = parsedArgs->getLastArg(OPT_heap)) { + uint64_t reserve; + uint64_t commit = ctx.getHeapCommit(); + if (!parseMemoryOption(arg->getValue(), reserve, commit)) + return false; + ctx.setHeapReserve(reserve); + ctx.setHeapCommit(commit); + } + + if (auto *arg = parsedArgs->getLastArg(OPT_align)) { + uint32_t align; + StringRef val = arg->getValue(); + if (val.getAsInteger(10, align)) { + diag << "error: invalid value for /align: " << val << "\n"; + return false; + } + ctx.setSectionDefaultAlignment(align); + } + + if (auto *arg = parsedArgs->getLastArg(OPT_version)) { + uint32_t major, minor; + if (!parseVersion(arg->getValue(), major, minor)) + return false; + ctx.setImageVersion(PECOFFLinkingContext::Version(major, minor)); + } + + // Parse /merge:<from>=<to>. + for (auto *arg : parsedArgs->filtered(OPT_merge)) { + StringRef from, to; + std::tie(from, to) = StringRef(arg->getValue()).split('='); + if (from.empty() || to.empty()) { + diag << "error: malformed /merge option: " << arg->getValue() << "\n"; + return false; + } + if (!ctx.addSectionRenaming(diag, from, to)) + return false; + } + + // Parse /subsystem:<subsystem>[,<majorOSVersion>[.<minorOSVersion>]]. + if (auto *arg = parsedArgs->getLastArg(OPT_subsystem)) { + llvm::COFF::WindowsSubsystem subsystem; + llvm::Optional<uint32_t> major, minor; + if (!parseSubsystem(arg->getValue(), subsystem, major, minor, diag)) + return false; + ctx.setSubsystem(subsystem); + if (major.hasValue()) + ctx.setMinOSVersion(PECOFFLinkingContext::Version(*major, *minor)); + } + + // Parse /section:name,[[!]{DEKPRSW}] + for (auto *arg : parsedArgs->filtered(OPT_section)) { + std::string section; + llvm::Optional<uint32_t> flags, mask; + if (!parseSection(arg->getValue(), section, flags, mask)) { + diag << "Unknown argument for /section: " << arg->getValue() << "\n"; + return false; + } + if (flags.hasValue()) + ctx.setSectionSetMask(section, *flags); + if (mask.hasValue()) + ctx.setSectionClearMask(section, *mask); + } + + // Parse /manifest:EMBED[,ID=#]|NO. + if (auto *arg = parsedArgs->getLastArg(OPT_manifest_colon)) { + bool enable = true; + bool embed = false; + int id = 1; + if (!parseManifest(arg->getValue(), enable, embed, id)) { + diag << "Unknown argument for /manifest: " << arg->getValue() << "\n"; + return false; + } + ctx.setCreateManifest(enable); + ctx.setEmbedManifest(embed); + ctx.setManifestId(id); + } + + // Parse /manifestuac. + if (auto *arg = parsedArgs->getLastArg(OPT_manifestuac)) { + if (StringRef(arg->getValue()).equals_lower("no")) { + ctx.setManifestUAC(false); + } else { + llvm::Optional<std::string> privilegeLevel; + llvm::Optional<std::string> uiAccess; + if (!parseManifestUAC(arg->getValue(), privilegeLevel, uiAccess)) { + diag << "Unknown argument for /manifestuac: " << arg->getValue() + << "\n"; + return false; + } + if (privilegeLevel.hasValue()) + ctx.setManifestLevel(privilegeLevel.getValue()); + if (uiAccess.hasValue()) + ctx.setManifestUiAccess(uiAccess.getValue()); + } + } + + if (auto *arg = parsedArgs->getLastArg(OPT_manifestfile)) + ctx.setManifestOutputPath(ctx.allocate(arg->getValue())); + + // /manifestdependency:<string> option. Note that the argument will be + // embedded to the manifest XML file with no error check, for link.exe + // compatibility. We do not gurantete that the resulting XML file is + // valid. + if (auto *arg = parsedArgs->getLastArg(OPT_manifestdependency)) + ctx.setManifestDependency(ctx.allocate(arg->getValue())); + + for (auto *arg : parsedArgs->filtered(OPT_failifmismatch)) + if (handleFailIfMismatchOption(arg->getValue(), failIfMismatchMap, diag)) + return false; + + if (auto *arg = parsedArgs->getLastArg(OPT_entry)) + ctx.setEntrySymbolName(ctx.allocate(arg->getValue())); + + for (auto *arg : parsedArgs->filtered(OPT_export)) { + PECOFFLinkingContext::ExportDesc desc; + if (!parseExport(arg->getValue(), desc)) { + diag << "Error: malformed /export option: " << arg->getValue() << "\n"; + return false; + } + + // Mangle the symbol name only if it is reading user-supplied command line + // arguments. Because the symbol name in the .drectve section is already + // mangled by the compiler, we shouldn't add a leading underscore in that + // case. It's odd that the command line option has different semantics in + // the .drectve section, but this behavior is needed for compatibility + // with MSVC's link.exe. + if (!isReadingDirectiveSection) + desc.name = ctx.decorateSymbol(desc.name); + ctx.addDllExport(desc); + } + + for (auto *arg : parsedArgs->filtered(OPT_deffile)) { + llvm::BumpPtrAllocator alloc; + std::vector<moduledef::Directive *> dirs; + if (!parseDef(arg->getValue(), alloc, dirs)) { + diag << "Error: invalid module-definition file\n"; + return false; + } + for (moduledef::Directive *dir : dirs) { + if (auto *exp = dyn_cast<moduledef::Exports>(dir)) { + for (PECOFFLinkingContext::ExportDesc desc : exp->getExports()) { + desc.name = ctx.decorateSymbol(desc.name); + ctx.addDllExport(desc); + } + } else if (auto *hs = dyn_cast<moduledef::Heapsize>(dir)) { + ctx.setHeapReserve(hs->getReserve()); + ctx.setHeapCommit(hs->getCommit()); + } else if (auto *lib = dyn_cast<moduledef::Library>(dir)) { + ctx.setIsDll(true); + ctx.setOutputPath(ctx.allocate(lib->getName())); + if (lib->getBaseAddress() && !ctx.getBaseAddress()) + ctx.setBaseAddress(lib->getBaseAddress()); + } else if (auto *name = dyn_cast<moduledef::Name>(dir)) { + if (!name->getOutputPath().empty() && ctx.outputPath().empty()) + ctx.setOutputPath(ctx.allocate(name->getOutputPath())); + if (name->getBaseAddress() && ctx.getBaseAddress()) + ctx.setBaseAddress(name->getBaseAddress()); + } else if (auto *ver = dyn_cast<moduledef::Version>(dir)) { + ctx.setImageVersion(PECOFFLinkingContext::Version( + ver->getMajorVersion(), ver->getMinorVersion())); + } else { + llvm::dbgs() << static_cast<int>(dir->getKind()) << "\n"; + llvm_unreachable("Unknown module-definition directive.\n"); + } + } + } + + for (auto *arg : parsedArgs->filtered(OPT_libpath)) + ctx.appendInputSearchPath(ctx.allocate(arg->getValue())); + + for (auto *arg : parsedArgs->filtered(OPT_opt)) { + std::string val = StringRef(arg->getValue()).lower(); + if (val == "noref") { + ctx.setDeadStripping(false); + } else if (val != "ref" && val != "icf" && val != "noicf" && + val != "lbr" && val != "nolbr" && + !StringRef(val).startswith("icf=")) { + diag << "unknown option for /opt: " << val << "\n"; + return false; + } + } + + // LLD is not yet capable of creating a PDB file, so /debug does not have + // any effect. + // TODO: This should disable dead stripping. Currently we can't do that + // because removal of associative sections depends on dead stripping. + if (parsedArgs->hasArg(OPT_debug)) + ctx.setDebug(true); + + if (parsedArgs->hasArg(OPT_verbose)) + ctx.setLogInputFiles(true); + + // /force and /force:unresolved mean the same thing. We do not currently + // support /force:multiple. + if (parsedArgs->hasArg(OPT_force) || + parsedArgs->hasArg(OPT_force_unresolved)) { + ctx.setAllowRemainingUndefines(true); + } + + if (parsedArgs->hasArg(OPT_fixed)) { + // /fixed is not compatible with /dynamicbase. Check for it. + if (parsedArgs->hasArg(OPT_dynamicbase)) { + diag << "/dynamicbase must not be specified with /fixed\n"; + return false; + } + ctx.setBaseRelocationEnabled(false); + ctx.setDynamicBaseEnabled(false); + } + + // /swaprun:{cd,net} options set IMAGE_FILE_{REMOVABLE,NET}_RUN_FROM_SWAP + // bits in the COFF header, respectively. If one of the bits is on, the + // Windows loader will copy the entire file to swap area then execute it, + // so that the user can eject a CD or disconnect from the network. + if (parsedArgs->hasArg(OPT_swaprun_cd)) + ctx.setSwapRunFromCD(true); + + if (parsedArgs->hasArg(OPT_swaprun_net)) + ctx.setSwapRunFromNet(true); + + if (parsedArgs->hasArg(OPT_profile)) { + // /profile implies /opt:ref, /opt:noicf, /incremental:no and /fixed:no. + ctx.setDeadStripping(true); + ctx.setBaseRelocationEnabled(true); + ctx.setDynamicBaseEnabled(true); + } + + for (auto *arg : parsedArgs->filtered(OPT_implib)) + ctx.setOutputImportLibraryPath(arg->getValue()); + + for (auto *arg : parsedArgs->filtered(OPT_delayload)) { + ctx.addInitialUndefinedSymbol(ctx.getDelayLoadHelperName()); + ctx.addDelayLoadDLL(arg->getValue()); + } + + if (auto *arg = parsedArgs->getLastArg(OPT_stub)) { + ArrayRef<uint8_t> contents; + if (!readFile(ctx, arg->getValue(), contents)) { + diag << "Failed to read DOS stub file " << arg->getValue() << "\n"; + return false; + } + ctx.setDosStub(contents); + } + + for (auto *arg : parsedArgs->filtered(OPT_incl)) + ctx.addInitialUndefinedSymbol(ctx.allocate(arg->getValue())); + + if (parsedArgs->hasArg(OPT_noentry)) + ctx.setHasEntry(false); + + if (parsedArgs->hasArg(OPT_nodefaultlib_all)) + ctx.setNoDefaultLibAll(true); + + if (auto *arg = parsedArgs->getLastArg(OPT_out)) + ctx.setOutputPath(ctx.allocate(arg->getValue())); + + if (auto *arg = parsedArgs->getLastArg(OPT_pdb)) + ctx.setPDBFilePath(arg->getValue()); + + if (auto *arg = parsedArgs->getLastArg(OPT_lldmoduledeffile)) + ctx.setModuleDefinitionFile(arg->getValue()); + + std::vector<StringRef> inputFiles; + for (auto *arg : parsedArgs->filtered(OPT_INPUT)) + inputFiles.push_back(ctx.allocate(arg->getValue())); + +#define BOOLEAN_FLAG(name, setter) \ + if (auto *arg = parsedArgs->getLastArg(OPT_##name, OPT_##name##_no)) \ + ctx.setter(arg->getOption().matches(OPT_##name)); + + BOOLEAN_FLAG(nxcompat, setNxCompat); + BOOLEAN_FLAG(largeaddressaware, setLargeAddressAware); + BOOLEAN_FLAG(allowbind, setAllowBind); + BOOLEAN_FLAG(allowisolation, setAllowIsolation); + BOOLEAN_FLAG(dynamicbase, setDynamicBaseEnabled); + BOOLEAN_FLAG(tsaware, setTerminalServerAware); + BOOLEAN_FLAG(highentropyva, setHighEntropyVA); + BOOLEAN_FLAG(safeseh, setSafeSEH); +#undef BOOLEAN_FLAG + + // Arguments after "--" are interpreted as filenames even if they + // start with a hypen or a slash. This is not compatible with link.exe + // but useful for us to test lld on Unix. + if (llvm::opt::Arg *dashdash = parsedArgs->getLastArg(OPT_DASH_DASH)) + for (const StringRef value : dashdash->getValues()) + inputFiles.push_back(value); + + // Compile Windows resource files to compiled resource file. + if (ctx.getCreateManifest() && ctx.getEmbedManifest() && + !isReadingDirectiveSection) { + std::string resFile; + if (!createManifestResourceFile(ctx, diag, resFile)) + return false; + inputFiles.push_back(ctx.allocate(resFile)); + } + + // A Windows Resource file is not an object file. It contains data, + // such as an icon image, and is not in COFF file format. If resource + // files are given, the linker merge them into one COFF file using + // CVTRES.EXE and then link the resulting file. + { + auto it = std::partition(inputFiles.begin(), inputFiles.end(), + isResoruceFile); + if (it != inputFiles.begin()) { + std::vector<std::string> resFiles(inputFiles.begin(), it); + std::string resObj; + if (!convertResourceFiles(ctx, resFiles, resObj)) { + diag << "Failed to convert resource files\n"; + return false; + } + inputFiles = std::vector<StringRef>(it, inputFiles.end()); + inputFiles.push_back(ctx.allocate(resObj)); + ctx.registerTemporaryFile(resObj); + } + } + + // Prepare objects to add them to the list of input files. + for (StringRef path : inputFiles) { + path = ctx.allocate(path); + if (isLibraryFile(path)) { + addFiles(ctx, getLibraryPath(ctx, path), diag, libraries); + } else { + addFiles(ctx, getObjectPath(ctx, path), diag, files); + } + } + + // If dead-stripping is enabled, we need to add the entry symbol and + // symbols given by /include to the dead strip root set, so that it + // won't be removed from the output. + if (ctx.deadStrip()) + for (const StringRef symbolName : ctx.initialUndefinedSymbols()) + ctx.addDeadStripRoot(symbolName); + + // Add the libraries specified by /defaultlib unless they are already added + // nor blacklisted by /nodefaultlib. + if (!ctx.getNoDefaultLibAll()) + for (const StringRef path : defaultLibs) + if (!ctx.hasNoDefaultLib(path)) + addFiles(ctx, getLibraryPath(ctx, path.lower()), diag, libraries); + + if (files.empty() && !isReadingDirectiveSection) { + diag << "No input files\n"; + return false; + } + + // If /out option was not specified, the default output file name is + // constructed by replacing an extension of the first input file + // with ".exe". + if (ctx.outputPath().empty()) { + StringRef path = files[0]->path(); + ctx.setOutputPath(replaceExtension(ctx, path, ".exe")); + } + + // Add the input files to the linking context. + for (std::unique_ptr<File> &file : files) { + if (isReadingDirectiveSection) { + File *f = file.get(); + ctx.getTaskGroup().spawn([f] { f->parse(); }); + } + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); + } + + // Add the library group to the linking context. + if (!isReadingDirectiveSection) { + // Add a group-end marker. + ctx.getNodes().push_back(llvm::make_unique<GroupEnd>(0)); + } + + // Add the library files to the library group. + for (std::unique_ptr<File> &file : libraries) { + if (!hasLibrary(ctx, file.get())) { + if (isReadingDirectiveSection) { + File *f = file.get(); + ctx.getTaskGroup().spawn([f] { f->parse(); }); + } + ctx.addLibraryFile(llvm::make_unique<FileNode>(std::move(file))); + } + } + + // Validate the combination of options used. + return ctx.validate(diag); +} + +} // namespace lld diff --git a/lib/Driver/WinLinkModuleDef.cpp b/lib/Driver/WinLinkModuleDef.cpp new file mode 100644 index 000000000000..e55a0bc5fe64 --- /dev/null +++ b/lib/Driver/WinLinkModuleDef.cpp @@ -0,0 +1,295 @@ +//===- lib/Driver/WinLinkModuleDef.cpp ------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Windows module definition file parser. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Driver/WinLinkModuleDef.h" +#include "llvm/ADT/StringSwitch.h" + +namespace lld { +namespace moduledef { + +Token Lexer::lex() { + for (;;) { + _buffer = _buffer.trim(); + if (_buffer.empty() || _buffer[0] == '\0') + return Token(Kind::eof, _buffer); + + switch (_buffer[0]) { + case ';': { + size_t end = _buffer.find('\n'); + _buffer = (end == _buffer.npos) ? "" : _buffer.drop_front(end); + continue; + } + case '=': + _buffer = _buffer.drop_front(); + return Token(Kind::equal, "="); + case ',': + _buffer = _buffer.drop_front(); + return Token(Kind::comma, ","); + case '"': { + size_t end = _buffer.find('"', 1); + Token ret; + if (end == _buffer.npos) { + ret = Token(Kind::identifier, _buffer.substr(1, end)); + _buffer = ""; + } else { + ret = Token(Kind::identifier, _buffer.substr(1, end - 1)); + _buffer = _buffer.drop_front(end + 1); + } + return ret; + } + default: { + size_t end = _buffer.find_first_not_of( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789_.*~+!@#$%^&*()/"); + StringRef word = _buffer.substr(0, end); + Kind kind = llvm::StringSwitch<Kind>(word) + .Case("BASE", Kind::kw_base) + .Case("DATA", Kind::kw_data) + .Case("EXPORTS", Kind::kw_exports) + .Case("HEAPSIZE", Kind::kw_heapsize) + .Case("LIBRARY", Kind::kw_library) + .Case("NAME", Kind::kw_name) + .Case("NONAME", Kind::kw_noname) + .Case("PRIVATE", Kind::kw_private) + .Case("STACKSIZE", Kind::kw_stacksize) + .Case("VERSION", Kind::kw_version) + .Default(Kind::identifier); + _buffer = (end == _buffer.npos) ? "" : _buffer.drop_front(end); + return Token(kind, word); + } + } + } +} + +void Parser::consumeToken() { + if (_tokBuf.empty()) { + _tok = _lex.lex(); + return; + } + _tok = _tokBuf.back(); + _tokBuf.pop_back(); +} + +bool Parser::consumeTokenAsInt(uint64_t &result) { + consumeToken(); + if (_tok._kind != Kind::identifier) { + ungetToken(); + error(_tok, "Integer expected"); + return false; + } + if (_tok._range.getAsInteger(10, result)) { + error(_tok, "Integer expected"); + return false; + } + return true; +} + +bool Parser::expectAndConsume(Kind kind, Twine msg) { + consumeToken(); + if (_tok._kind != kind) { + error(_tok, msg); + return false; + } + return true; +} + +void Parser::ungetToken() { _tokBuf.push_back(_tok); } + +void Parser::error(const Token &tok, Twine msg) { + _lex.getSourceMgr().PrintMessage( + llvm::SMLoc::getFromPointer(tok._range.data()), llvm::SourceMgr::DK_Error, + msg); +} + +bool Parser::parse(std::vector<Directive *> &ret) { + for (;;) { + Directive *dir = nullptr; + if (!parseOne(dir)) + return false; + if (!dir) + return true; + ret.push_back(dir); + } +} + +bool Parser::parseOne(Directive *&ret) { + consumeToken(); + switch (_tok._kind) { + case Kind::eof: + return true; + case Kind::kw_exports: { + // EXPORTS + std::vector<PECOFFLinkingContext::ExportDesc> exports; + for (;;) { + PECOFFLinkingContext::ExportDesc desc; + if (!parseExport(desc)) + break; + exports.push_back(desc); + } + ret = new (_alloc) Exports(exports); + return true; + } + case Kind::kw_heapsize: { + // HEAPSIZE + uint64_t reserve, commit; + if (!parseMemorySize(reserve, commit)) + return false; + ret = new (_alloc) Heapsize(reserve, commit); + return true; + } + case Kind::kw_library: { + // LIBRARY + std::string name; + uint64_t baseaddr; + if (!parseName(name, baseaddr)) + return false; + if (!StringRef(name).endswith_lower(".dll")) + name.append(".dll"); + ret = new (_alloc) Library(name, baseaddr); + return true; + } + case Kind::kw_stacksize: { + // STACKSIZE + uint64_t reserve, commit; + if (!parseMemorySize(reserve, commit)) + return false; + ret = new (_alloc) Stacksize(reserve, commit); + return true; + } + case Kind::kw_name: { + // NAME + std::string outputPath; + uint64_t baseaddr; + if (!parseName(outputPath, baseaddr)) + return false; + ret = new (_alloc) Name(outputPath, baseaddr); + return true; + } + case Kind::kw_version: { + // VERSION + int major, minor; + if (!parseVersion(major, minor)) + return false; + ret = new (_alloc) Version(major, minor); + return true; + } + default: + error(_tok, Twine("Unknown directive: ") + _tok._range); + return false; + } +} + +bool Parser::parseExport(PECOFFLinkingContext::ExportDesc &result) { + consumeToken(); + if (_tok._kind != Kind::identifier) { + ungetToken(); + return false; + } + result.name = _tok._range; + + consumeToken(); + if (_tok._kind == Kind::equal) { + consumeToken(); + if (_tok._kind != Kind::identifier) + return false; + result.externalName = result.name; + result.name = _tok._range; + } else { + ungetToken(); + } + + for (;;) { + consumeToken(); + if (_tok._kind == Kind::identifier && _tok._range[0] == '@') { + _tok._range.drop_front().getAsInteger(10, result.ordinal); + consumeToken(); + if (_tok._kind == Kind::kw_noname) { + result.noname = true; + } else { + ungetToken(); + } + continue; + } + if (_tok._kind == Kind::kw_data) { + result.isData = true; + continue; + } + if (_tok._kind == Kind::kw_private) { + result.isPrivate = true; + continue; + } + ungetToken(); + return true; + } +} + +// HEAPSIZE/STACKSIZE reserve[,commit] +bool Parser::parseMemorySize(uint64_t &reserve, uint64_t &commit) { + if (!consumeTokenAsInt(reserve)) + return false; + + consumeToken(); + if (_tok._kind != Kind::comma) { + ungetToken(); + commit = 0; + return true; + } + + if (!consumeTokenAsInt(commit)) + return false; + return true; +} + +// NAME [outputPath] [BASE=address] +bool Parser::parseName(std::string &outputPath, uint64_t &baseaddr) { + consumeToken(); + if (_tok._kind == Kind::identifier) { + outputPath = _tok._range; + } else { + outputPath = ""; + ungetToken(); + return true; + } + consumeToken(); + if (_tok._kind == Kind::kw_base) { + if (!expectAndConsume(Kind::equal, "'=' expected")) + return false; + if (!consumeTokenAsInt(baseaddr)) + return false; + } else { + ungetToken(); + baseaddr = 0; + } + return true; +} + +// VERSION major[.minor] +bool Parser::parseVersion(int &major, int &minor) { + consumeToken(); + if (_tok._kind != Kind::identifier) + return false; + StringRef v1, v2; + std::tie(v1, v2) = _tok._range.split('.'); + if (v1.getAsInteger(10, major)) + return false; + if (v2.empty()) { + minor = 0; + } else if (v2.getAsInteger(10, minor)) { + return false; + } + return true; +} + +} // moddef +} // namespace lld diff --git a/lib/Driver/WinLinkOptions.td b/lib/Driver/WinLinkOptions.td new file mode 100644 index 000000000000..a545639b5bb2 --- /dev/null +++ b/lib/Driver/WinLinkOptions.td @@ -0,0 +1,120 @@ +include "llvm/Option/OptParser.td" + +// link.exe accepts options starting with either a dash or a slash. + +// Flag that takes no arguments. +class F<string name> : Flag<["/", "-", "-?"], name>; + +// Flag that takes one argument after ":". +class P<string name, string help> : + Joined<["/", "-", "-?"], name#":">, HelpText<help>; + +// Boolean flag suffixed by ":no". +multiclass B<string name, string help> { + def "" : F<name>; + def _no : F<name#":no">, HelpText<help>; +} + +def alternatename : P<"alternatename", "Define weak alias">; +def base : P<"base", "Base address of the program">; +def defaultlib : P<"defaultlib", "Add the library to the list of input files">; +def nodefaultlib : P<"nodefaultlib", "Remove a default library">; +def disallowlib : Joined<["/", "-", "-?"], "disallowlib:">, Alias<nodefaultlib>; +def entry : P<"entry", "Name of entry point symbol">; +// No help text because /failifmismatch is not intended to be used by the user. +def export : P<"export", "Export a function">; +def failifmismatch : P<"failifmismatch", "">; +def heap : P<"heap", "Size of the heap">; +def align : P<"align", "Section alignment">; +def libpath : P<"libpath", "Additional library search path">; +def mllvm : P<"mllvm", "Options to pass to LLVM">; +def out : P<"out", "Path to file to write output">; +def stack : P<"stack", "Size of the stack">; +def machine : P<"machine", "Specify target platform">; +def version : P<"version", "Specify a version number in the PE header">; +def merge : P<"merge", "Combine sections">; +def section : P<"section", "Specify section attributes">; +def subsystem : P<"subsystem", "Specify subsystem">; +def stub : P<"stub", "Specify DOS stub file">; +def opt : P<"opt", "Control optimizations">; +def implib : P<"implib", "Import library name">; +def delayload : P<"delayload", "Delay loaded DLL name">; +def pdb : P<"pdb", "PDB file path">; + +def manifest : F<"manifest">; +def manifest_colon : P<"manifest", "Create manifest file">; +def manifestuac : P<"manifestuac", "User access control">; +def manifestfile : P<"manifestfile", "Manifest file path">; +def manifestdependency : P<"manifestdependency", + "Attributes for <dependency> in manifest file">; + +// We cannot use multiclass P because class name "incl" is different +// from its command line option name. We do this because "include" is +// a reserved keyword in tablegen. +def incl : Joined<["/", "-"], "include:">, + HelpText<"Force symbol to be added to symbol table as undefined one">; + +// "def" is also a keyword. +def deffile : Joined<["/", "-"], "def:">, + HelpText<"Use module-definition file">; + +def nodefaultlib_all : F<"nodefaultlib">; +def noentry : F<"noentry">; +def dll : F<"dll">; +def verbose : F<"verbose">; +def debug : F<"debug">; +def swaprun_cd : F<"swaprun:cd">; +def swaprun_net : F<"swaprun:net">; +def profile : F<"profile">; + +def force : F<"force">, + HelpText<"Allow undefined symbols when creating executables">; +def force_unresolved : F<"force:unresolved">; + +defm nxcompat : B<"nxcompat", "Disable data execution provention">; +defm largeaddressaware : B<"largeaddressaware", "Disable large addresses">; +defm allowbind: B<"allowbind", "Disable DLL binding">; +defm fixed : B<"fixed", "Enable base relocations">; +defm tsaware : B<"tsaware", "Create non-Terminal Server aware executable">; +defm allowisolation : B<"allowisolation", "Set NO_ISOLATION bit">; +defm dynamicbase : B<"dynamicbase", + "Disable address space layout randomization">; +defm safeseh : B<"safeseh", "Produce an image with Safe Exception Handler">; +defm highentropyva : B<"highentropyva", "Set HIGH_ENTROPY_VA bit">; + +def help : F<"help">; +def help_q : Flag<["/?", "-?"], "">, Alias<help>; + +def DASH_DASH : Option<["--"], "", KIND_REMAINING_ARGS>; + +// Flag for debug +def lldmoduledeffile : Joined<["/", "-"], "lldmoduledeffile:">; + +//============================================================================== +// The flags below do nothing. They are defined only for link.exe compatibility. +//============================================================================== + +class QF<string name> : Joined<["/", "-", "-?"], name#":">; + +multiclass QB<string name> { + def "" : F<name>; + def _no : F<name#":no">; +} + +def functionpadmin : F<"functionpadmin">; +def ignoreidl : F<"ignoreidl">; +def incremental : F<"incremental">; +def no_incremental : F<"incremental:no">; +def nologo : F<"nologo">; + +def delay : QF<"delay">; +def errorreport : QF<"errorreport">; +def idlout : QF<"idlout">; +def ignore : QF<"ignore">; +def maxilksize : QF<"maxilksize">; +def pdbaltpath : QF<"pdbaltpath">; +def tlbid : QF<"tlbid">; +def tlbout : QF<"tlbout">; +def verbose_all : QF<"verbose">; + +defm wx : QB<"wx">; diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 000000000000..83112eaf972a --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,16 @@ +##===- lib/Makefile ----------------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +LLD_LEVEL := .. + +# ARCMigrate and Rewrite are always needed because of libclang. +PARALLEL_DIRS = Config Core Driver ReaderWriter + +include $(LLD_LEVEL)/../../Makefile.config + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/CMakeLists.txt b/lib/ReaderWriter/CMakeLists.txt new file mode 100644 index 000000000000..1fd19eb73a75 --- /dev/null +++ b/lib/ReaderWriter/CMakeLists.txt @@ -0,0 +1,20 @@ +add_subdirectory(ELF) +add_subdirectory(MachO) +add_subdirectory(Native) +add_subdirectory(PECOFF) +add_subdirectory(YAML) + +if (MSVC) + add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.' +endif() + +add_llvm_library(lldReaderWriter + CoreLinkingContext.cpp + FileArchive.cpp + LinkerScript.cpp + LINK_LIBS + lldCore + lldYAML + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/CoreLinkingContext.cpp b/lib/ReaderWriter/CoreLinkingContext.cpp new file mode 100644 index 000000000000..86fad4f6e77d --- /dev/null +++ b/lib/ReaderWriter/CoreLinkingContext.cpp @@ -0,0 +1,171 @@ +//===- lib/ReaderWriter/CoreLinkingContext.cpp ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/CoreLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" + +using namespace lld; + +namespace { + +/// \brief Simple atom created by the stubs pass. +class TestingStubAtom : public DefinedAtom { +public: + TestingStubAtom(const File &F, const Atom &) : _file(F) { + static uint32_t lastOrdinal = 0; + _ordinal = lastOrdinal++; + } + + const File &file() const override { return _file; } + + StringRef name() const override { return StringRef(); } + + uint64_t ordinal() const override { return _ordinal; } + + uint64_t size() const override { return 0; } + + Scope scope() const override { return DefinedAtom::scopeLinkageUnit; } + + Interposable interposable() const override { return DefinedAtom::interposeNo; } + + Merge merge() const override { return DefinedAtom::mergeNo; } + + ContentType contentType() const override { return DefinedAtom::typeStub; } + + Alignment alignment() const override { return Alignment(0, 0); } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return StringRef(); } + + DeadStripKind deadStrip() const override { + return DefinedAtom::deadStripNormal; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + + reference_iterator begin() const override { + return reference_iterator(*this, nullptr); + } + + reference_iterator end() const override { + return reference_iterator(*this, nullptr); + } + + const Reference *derefIterator(const void *iter) const override { + return nullptr; + } + + void incrementIterator(const void *&iter) const override {} + +private: + const File &_file; + uint32_t _ordinal; +}; + +/// \brief Simple atom created by the GOT pass. +class TestingGOTAtom : public DefinedAtom { +public: + TestingGOTAtom(const File &F, const Atom &) : _file(F) { + static uint32_t lastOrdinal = 0; + _ordinal = lastOrdinal++; + } + + const File &file() const override { return _file; } + + StringRef name() const override { return StringRef(); } + + uint64_t ordinal() const override { return _ordinal; } + + uint64_t size() const override { return 0; } + + Scope scope() const override { return DefinedAtom::scopeLinkageUnit; } + + Interposable interposable() const override { return DefinedAtom::interposeNo; } + + Merge merge() const override { return DefinedAtom::mergeNo; } + + ContentType contentType() const override { return DefinedAtom::typeGOT; } + + Alignment alignment() const override { return Alignment(3, 0); } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return StringRef(); } + + DeadStripKind deadStrip() const override { + return DefinedAtom::deadStripNormal; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + + reference_iterator begin() const override { + return reference_iterator(*this, nullptr); + } + + reference_iterator end() const override { + return reference_iterator(*this, nullptr); + } + + const Reference *derefIterator(const void *iter) const override { + return nullptr; + } + + void incrementIterator(const void *&iter) const override {} + +private: + const File &_file; + uint32_t _ordinal; +}; + +class OrderPass : public Pass { +public: + /// Sorts atoms by position + void perform(std::unique_ptr<MutableFile> &file) override { + MutableFile::DefinedAtomRange defined = file->definedAtoms(); + std::sort(defined.begin(), defined.end(), DefinedAtom::compareByPosition); + } +}; + +} // anonymous namespace + +CoreLinkingContext::CoreLinkingContext() {} + +bool CoreLinkingContext::validateImpl(raw_ostream &) { + _writer = createWriterYAML(*this); + return true; +} + +void CoreLinkingContext::addPasses(PassManager &pm) { + for (StringRef name : _passNames) { + if (name.equals("order")) + pm.add(std::unique_ptr<Pass>(new OrderPass())); + else + llvm_unreachable("bad pass name"); + } +} + +Writer &CoreLinkingContext::writer() const { return *_writer; } diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64DynamicLibraryWriter.h b/lib/ReaderWriter/ELF/AArch64/AArch64DynamicLibraryWriter.h new file mode 100644 index 000000000000..12ba52a38f38 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64DynamicLibraryWriter.h @@ -0,0 +1,69 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64DynamicLibraryWriter.h ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AARCH64_DYNAMIC_LIBRARY_WRITER_H +#define AARCH64_DYNAMIC_LIBRARY_WRITER_H + +#include "AArch64LinkingContext.h" +#include "AArch64TargetHandler.h" +#include "DynamicLibraryWriter.h" + +namespace lld { +namespace elf { + +template <class ELFT> +class AArch64DynamicLibraryWriter : public DynamicLibraryWriter<ELFT> { +public: + AArch64DynamicLibraryWriter(AArch64LinkingContext &context, + AArch64TargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues() { + return DynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues(); + } + + virtual void addDefaultAtoms() { + return DynamicLibraryWriter<ELFT>::addDefaultAtoms(); + } + +private: + class GOTFile : public SimpleFile { + public: + GOTFile(const ELFLinkingContext &eti) : SimpleFile("GOTFile") {} + llvm::BumpPtrAllocator _alloc; + }; + + std::unique_ptr<GOTFile> _gotFile; + AArch64LinkingContext &_context; + AArch64TargetLayout<ELFT> &_AArch64Layout; +}; + +template <class ELFT> +AArch64DynamicLibraryWriter<ELFT>::AArch64DynamicLibraryWriter( + AArch64LinkingContext &context, AArch64TargetLayout<ELFT> &layout) + : DynamicLibraryWriter<ELFT>(context, layout), + _gotFile(new GOTFile(context)), _context(context), + _AArch64Layout(layout) {} + +template <class ELFT> +bool AArch64DynamicLibraryWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + DynamicLibraryWriter<ELFT>::createImplicitFiles(result); + _gotFile->addAtom(*new (_gotFile->_alloc) GLOBAL_OFFSET_TABLEAtom(*_gotFile)); + _gotFile->addAtom(*new (_gotFile->_alloc) DYNAMICAtom(*_gotFile)); + result.push_back(std::move(_gotFile)); + return true; +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64ELFFile.h b/lib/ReaderWriter/ELF/AArch64/AArch64ELFFile.h new file mode 100644 index 000000000000..9d5207c1c4b4 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64ELFFile.h @@ -0,0 +1,41 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64ELFFile.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_AARCH64_AARCH64_ELF_FILE_H +#define LLD_READER_WRITER_ELF_AARCH64_AARCH64_ELF_FILE_H + +#include "ELFReader.h" + +namespace lld { +namespace elf { + +class AArch64LinkingContext; + +template <class ELFT> class AArch64ELFFile : public ELFFile<ELFT> { +public: + AArch64ELFFile(std::unique_ptr<MemoryBuffer> mb, AArch64LinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<AArch64ELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, AArch64LinkingContext &ctx) { + return std::unique_ptr<AArch64ELFFile<ELFT>>( + new AArch64ELFFile<ELFT>(std::move(mb), ctx)); + } +}; + +template <class ELFT> class AArch64DynamicFile : public DynamicFile<ELFT> { +public: + AArch64DynamicFile(const AArch64LinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_AARCH64_AARCH64_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64ELFReader.h b/lib/ReaderWriter/ELF/AArch64/AArch64ELFReader.h new file mode 100644 index 000000000000..05f312db3e7b --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64ELFReader.h @@ -0,0 +1,62 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64ELFReader.h --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_AARCH64_AARCH64_ELF_READER_H +#define LLD_READER_WRITER_AARCH64_AARCH64_ELF_READER_H + +#include "AArch64ELFFile.h" +#include "ELFReader.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, true> AArch64ELFType; + +struct AArch64DynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + AArch64LinkingContext &ctx) { + return lld::elf::AArch64DynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct AArch64ELFFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + AArch64LinkingContext &ctx) { + return lld::elf::AArch64ELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +class AArch64ELFObjectReader + : public ELFObjectReader<AArch64ELFType, AArch64ELFFileCreateELFTraits, + AArch64LinkingContext> { +public: + AArch64ELFObjectReader(AArch64LinkingContext &ctx) + : ELFObjectReader<AArch64ELFType, AArch64ELFFileCreateELFTraits, + AArch64LinkingContext>(ctx, llvm::ELF::EM_AARCH64) {} +}; + +class AArch64ELFDSOReader + : public ELFDSOReader<AArch64ELFType, AArch64DynamicFileCreateELFTraits, + AArch64LinkingContext> { +public: + AArch64ELFDSOReader(AArch64LinkingContext &ctx) + : ELFDSOReader<AArch64ELFType, AArch64DynamicFileCreateELFTraits, + AArch64LinkingContext>(ctx, llvm::ELF::EM_AARCH64) {} +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_AARCH64_AARCH64_ELF_READER_H diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64ExecutableWriter.h b/lib/ReaderWriter/ELF/AArch64/AArch64ExecutableWriter.h new file mode 100644 index 000000000000..73963f56ef70 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64ExecutableWriter.h @@ -0,0 +1,68 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64ExecutableWriter.h -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef AARCH64_EXECUTABLE_WRITER_H +#define AARCH64_EXECUTABLE_WRITER_H + +#include "AArch64LinkingContext.h" +#include "ExecutableWriter.h" + +namespace lld { +namespace elf { + +template <class ELFT> +class AArch64ExecutableWriter : public ExecutableWriter<ELFT> { +public: + AArch64ExecutableWriter(AArch64LinkingContext &context, + AArch64TargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + void finalizeDefaultAtomValues() override { + return ExecutableWriter<ELFT>::finalizeDefaultAtomValues(); + } + + void addDefaultAtoms() override{ + return ExecutableWriter<ELFT>::addDefaultAtoms(); + } + +private: + class GOTFile : public SimpleFile { + public: + GOTFile(const ELFLinkingContext &eti) : SimpleFile("GOTFile") {} + llvm::BumpPtrAllocator _alloc; + }; + + std::unique_ptr<GOTFile> _gotFile; + AArch64LinkingContext &_context; + AArch64TargetLayout<ELFT> &_AArch64Layout; +}; + +template <class ELFT> +AArch64ExecutableWriter<ELFT>::AArch64ExecutableWriter( + AArch64LinkingContext &context, AArch64TargetLayout<ELFT> &layout) + : ExecutableWriter<ELFT>(context, layout), _gotFile(new GOTFile(context)), + _context(context), _AArch64Layout(layout) {} + +template <class ELFT> +bool AArch64ExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter<ELFT>::createImplicitFiles(result); + _gotFile->addAtom(*new (_gotFile->_alloc) GLOBAL_OFFSET_TABLEAtom(*_gotFile)); + if (_context.isDynamic()) + _gotFile->addAtom(*new (_gotFile->_alloc) DYNAMICAtom(*_gotFile)); + result.push_back(std::move(_gotFile)); + return true; +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.cpp b/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.cpp new file mode 100644 index 000000000000..9eb98f447709 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.cpp @@ -0,0 +1,33 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.cpp -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64LinkingContext.h" +#include "AArch64RelocationPass.h" +#include "AArch64TargetHandler.h" + +using namespace lld; + +std::unique_ptr<ELFLinkingContext> +elf::AArch64LinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::aarch64) + return std::unique_ptr<ELFLinkingContext>( + new elf::AArch64LinkingContext(triple)); + return nullptr; +} + +elf::AArch64LinkingContext::AArch64LinkingContext(llvm::Triple triple) + : ELFLinkingContext(triple, std::unique_ptr<TargetHandlerBase>( + new AArch64TargetHandler(*this))) {} + +void elf::AArch64LinkingContext::addPasses(PassManager &pm) { + auto pass = createAArch64RelocationPass(*this); + if (pass) + pm.add(std::move(pass)); + ELFLinkingContext::addPasses(pm); +} diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.h b/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.h new file mode 100644 index 000000000000..ebd91fe0a95b --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.h @@ -0,0 +1,95 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64LinkingContext.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_AARCH64_AARCH64_LINKING_CONTEXT_H +#define LLD_READER_WRITER_ELF_AARCH64_AARCH64_LINKING_CONTEXT_H + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { + +enum { + /// \brief The offset to add operation for a R_AARCH64_ADR_GOT_PAGE + ADD_AARCH64_GOTRELINDEX = 0xE000, +}; + +class AArch64LinkingContext final : public ELFLinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + AArch64LinkingContext(llvm::Triple); + + void addPasses(PassManager &) override; + + uint64_t getBaseAddress() const override { + if (_baseAddress == 0) + return 0x400000; + return _baseAddress; + } + + bool isDynamicRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::AArch64); + switch (r.kindValue()) { + case llvm::ELF::R_AARCH64_COPY: + case llvm::ELF::R_AARCH64_GLOB_DAT: + case llvm::ELF::R_AARCH64_RELATIVE: + case llvm::ELF::R_AARCH64_TLS_DTPREL64: + case llvm::ELF::R_AARCH64_TLS_DTPMOD64: + case llvm::ELF::R_AARCH64_TLS_TPREL64: + case llvm::ELF::R_AARCH64_TLSDESC: + case llvm::ELF::R_AARCH64_IRELATIVE: + return true; + default: + return false; + } + } + + bool isCopyRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::AArch64); + if (r.kindValue() == llvm::ELF::R_AARCH64_COPY) + return true; + return false; + } + + bool isPLTRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::AArch64); + switch (r.kindValue()) { + case llvm::ELF::R_AARCH64_JUMP_SLOT: + case llvm::ELF::R_AARCH64_IRELATIVE: + return true; + default: + return false; + } + } + + bool isRelativeReloc(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::AArch64); + switch (r.kindValue()) { + case llvm::ELF::R_AARCH64_IRELATIVE: + case llvm::ELF::R_AARCH64_RELATIVE: + return true; + default: + return false; + } + } +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.cpp b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.cpp new file mode 100644 index 000000000000..d1ecc7fa884b --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.cpp @@ -0,0 +1,440 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.cpp ----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AArch64TargetHandler.h" +#include "AArch64LinkingContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MathExtras.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::support::endian; + +#define PAGE(X) ((X) & ~0x0FFFL) + +/// \brief Check X is in the interval (-2^(bits-1), 2^bits] +static bool withinSignedUnsignedRange(int64_t X, int bits) { + return isIntN(bits - 1, X) || isUIntN(bits, X); +} + +/// \brief R_AARCH64_ABS64 - word64: S + A +static void relocR_AARCH64_ABS64(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + int64_t result = (int64_t)S + A; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + write64le(location, result | read64le(location)); +} + +/// \brief R_AARCH64_PREL32 - word32: S + A - P +static void relocR_AARCH64_PREL32(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + int32_t result = (int32_t)((S + A) - P); + write32le(location, result + (int32_t)read32le(location)); +} + +/// \brief R_AARCH64_ABS32 - word32: S + A +static std::error_code relocR_AARCH64_ABS32(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int64_t result = S + A; + if (!withinSignedUnsignedRange(result, 32)) + return make_out_of_range_reloc_error(); + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); + return std::error_code(); +} + +/// \brief R_AARCH64_ADR_PREL_PG_HI21 - Page(S+A) - Page(P) +static void relocR_AARCH64_ADR_PREL_PG_HI21(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + uint64_t result = (PAGE(S + A) - PAGE(P)); + result = result >> 12; + uint32_t immlo = result & 0x3; + uint32_t immhi = result & 0x1FFFFC; + immlo = immlo << 29; + immhi = immhi << 3; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " immhi: " << Twine::utohexstr(immhi); + llvm::dbgs() << " immlo: " << Twine::utohexstr(immlo); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, immlo | immhi | read32le(location)); + // TODO: Make sure this is correct! +} + +/// \brief R_AARCH64_ADR_PREL_LO21 - S + A - P +static void relocR_AARCH64_ADR_PREL_LO21(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + uint64_t result = (S + A) - P; + uint32_t immlo = result & 0x3; + uint32_t immhi = result & 0x1FFFFC; + immlo = immlo << 29; + immhi = immhi << 3; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " immhi: " << Twine::utohexstr(immhi); + llvm::dbgs() << " immlo: " << Twine::utohexstr(immlo); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, immlo | immhi | read32le(location)); + // TODO: Make sure this is correct! +} + +/// \brief R_AARCH64_ADD_ABS_LO12_NC +static void relocR_AARCH64_ADD_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)((S + A) & 0xFFF); + result <<= 10; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +static void relocJump26(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + int32_t result = (int32_t)((S + A) - P); + result &= 0x0FFFFFFC; + result >>= 2; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_CONDBR19 +static void relocR_AARCH64_CONDBR19(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + int32_t result = (int32_t)((S + A) - P); + result &= 0x01FFFFC; + result <<= 3; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_LDST8_ABS_LO12_NC - S + A +static void relocR_AARCH64_LDST8_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)((S + A) & 0xFFF); + result <<= 10; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_LDST16_ABS_LO12_NC +static void relocR_AARCH64_LDST16_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)(S + A); + result &= 0x0FFC; + result <<= 9; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_LDST32_ABS_LO12_NC +static void relocR_AARCH64_LDST32_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)(S + A); + result &= 0x0FFC; + result <<= 8; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_LDST64_ABS_LO12_NC +static void relocR_AARCH64_LDST64_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)(S + A); + result &= 0x0FF8; + result <<= 7; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_LDST128_ABS_LO12_NC +static void relocR_AARCH64_LDST128_ABS_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = (int32_t)(S + A); + result &= 0x0FF8; + result <<= 6; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +static void relocR_AARCH64_ADR_GOT_PAGE(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + uint64_t result = PAGE(S + A) - PAGE(P); + result >>= 12; + uint32_t immlo = result & 0x3; + uint32_t immhi = result & 0x1FFFFC; + immlo = immlo << 29; + immhi = immhi << 3; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " immhi: " << Twine::utohexstr(immhi); + llvm::dbgs() << " immlo: " << Twine::utohexstr(immlo); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +// R_AARCH64_LD64_GOT_LO12_NC +static void relocR_AARCH64_LD64_GOT_LO12_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = S + A; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + result &= 0xFF8; + result <<= 7; + write32le(location, result | read32le(location)); +} + +// ADD_AARCH64_GOTRELINDEX +static void relocADD_AARCH64_GOTRELINDEX(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = S + A; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + result &= 0xFFF; + result <<= 10; + write32le(location, result | read32le(location)); +} + +// R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 +static void relocR_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21(uint8_t *location, + uint64_t P, uint64_t S, + int64_t A) { + int64_t result = PAGE(S + A) - PAGE(P); + result >>= 12; + uint32_t immlo = result & 0x3; + uint32_t immhi = result & 0x1FFFFC; + immlo = immlo << 29; + immhi = immhi << 3; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " immhi: " << Twine::utohexstr(immhi); + llvm::dbgs() << " immlo: " << Twine::utohexstr(immlo); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, immlo | immhi | read32le(location)); +} + +// R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC +static void relocR_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC(uint8_t *location, + uint64_t P, uint64_t S, + int64_t A) { + int32_t result = S + A; + result &= 0xFF8; + result <<= 7; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_TLSLE_ADD_TPREL_HI12 +static void relocR_AARCH64_TLSLE_ADD_TPREL_HI12(uint8_t *location, uint64_t P, + uint64_t S, int64_t A) { + int32_t result = S + A; + result &= 0x0FFF000; + result >>= 2; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +/// \brief R_AARCH64_TLSLE_ADD_TPREL_LO12_NC +static void relocR_AARCH64_TLSLE_ADD_TPREL_LO12_NC(uint8_t *location, + uint64_t P, uint64_t S, + int64_t A) { + int32_t result = S + A; + result &= 0x0FFF; + result <<= 10; + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: " << Twine::utohexstr(S); + llvm::dbgs() << " A: " << Twine::utohexstr(A); + llvm::dbgs() << " P: " << Twine::utohexstr(P); + llvm::dbgs() << " result: " << Twine::utohexstr(result) << "\n"); + write32le(location, result | read32le(location)); +} + +std::error_code AArch64TargetRelocationHandler::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t targetVAddress = writer.addressOfAtom(ref.target()); + uint64_t relocVAddress = atom._virtualAddr + ref.offsetInAtom(); + + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case R_AARCH64_NONE: + break; + case R_AARCH64_ABS64: + relocR_AARCH64_ABS64(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_AARCH64_PREL32: + relocR_AARCH64_PREL32(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_ABS32: + return relocR_AARCH64_ABS32(location, relocVAddress, targetVAddress, + ref.addend()); + // Runtime only relocations. Ignore here. + case R_AARCH64_RELATIVE: + case R_AARCH64_IRELATIVE: + case R_AARCH64_JUMP_SLOT: + case R_AARCH64_GLOB_DAT: + break; + case R_AARCH64_ADR_PREL_PG_HI21: + relocR_AARCH64_ADR_PREL_PG_HI21(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_ADR_PREL_LO21: + relocR_AARCH64_ADR_PREL_LO21(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + relocR_AARCH64_ADD_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + relocJump26(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_AARCH64_CONDBR19: + relocR_AARCH64_CONDBR19(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_ADR_GOT_PAGE: + relocR_AARCH64_ADR_GOT_PAGE(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LD64_GOT_LO12_NC: + relocR_AARCH64_LD64_GOT_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + relocR_AARCH64_LDST8_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LDST16_ABS_LO12_NC: + relocR_AARCH64_LDST16_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LDST32_ABS_LO12_NC: + relocR_AARCH64_LDST32_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LDST64_ABS_LO12_NC: + relocR_AARCH64_LDST64_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_LDST128_ABS_LO12_NC: + relocR_AARCH64_LDST128_ABS_LO12_NC(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case ADD_AARCH64_GOTRELINDEX: + relocADD_AARCH64_GOTRELINDEX(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + relocR_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21(location, relocVAddress, + targetVAddress, ref.addend()); + break; + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + relocR_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC(location, relocVAddress, + targetVAddress, ref.addend()); + break; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + relocR_AARCH64_TLSLE_ADD_TPREL_HI12(location, relocVAddress, targetVAddress, + ref.addend()); + break; + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + relocR_AARCH64_TLSLE_ADD_TPREL_LO12_NC(location, relocVAddress, + targetVAddress, ref.addend()); + break; + default: + return make_unhandled_reloc_error(); + } + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.h b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.h new file mode 100644 index 000000000000..b1d3c09dc936 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.h @@ -0,0 +1,33 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64RelocationHandler.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64_RELOCATION_HANDLER_H +#define AARCH64_RELOCATION_HANDLER_H + +#include "AArch64TargetHandler.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, true> AArch64ELFType; + +template <class ELFT> class AArch64TargetLayout; + +class AArch64TargetRelocationHandler final : public TargetRelocationHandler { +public: + std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const override; + + static const Registry::KindStrings kindStrings[]; +}; + +} // end namespace elf +} // end namespace lld + +#endif // AArch64_RELOCATION_HANDLER_H diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.cpp b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.cpp new file mode 100644 index 000000000000..0bd12958b27b --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.cpp @@ -0,0 +1,527 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.cpp -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the relocation processing pass for AArch64. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +/// This also includes additional behavior that gnu-ld and gold implement but +/// which is not specified anywhere. +/// +//===----------------------------------------------------------------------===// + +#include "AArch64RelocationPass.h" +#include "AArch64LinkingContext.h" +#include "Atoms.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; + +namespace { +// .got values +const uint8_t AArch64GotAtomContent[8] = {0}; + +// .plt value (entry 0) +const uint8_t AArch64Plt0AtomContent[32] = { + 0xf0, 0x7b, 0xbf, + 0xa9, // stp x16, x30, [sp,#-16]! + 0x10, 0x00, 0x00, + 0x90, // adrp x16, Page(eh_frame) + 0x11, 0x02, 0x40, + 0xf9, // ldr x17, [x16,#offset] + 0x10, 0x02, 0x00, + 0x91, // add x16, x16, #offset + 0x20, 0x02, 0x1f, + 0xd6, // br x17 + 0x1f, 0x20, 0x03, + 0xd5, // nop + 0x1f, 0x20, 0x03, + 0xd5, // nop + 0x1f, 0x20, 0x03, + 0xd5 // nop +}; + +// .plt values (other entries) +const uint8_t AArch64PltAtomContent[16] = { + 0x10, 0x00, 0x00, + 0x90, // adrp x16, PAGE(<GLOBAL_OFFSET_TABLE>) + 0x11, 0x02, 0x40, + 0xf9, // ldr x17, [x16,#offset] + 0x10, 0x02, 0x00, + 0x91, // add x16, x16, #offset + 0x20, 0x02, 0x1f, + 0xd6 // br x17 +}; + +/// \brief Atoms that are used by AArch64 dynamic linking +class AArch64GOTAtom : public GOTAtom { +public: + AArch64GOTAtom(const File &f, StringRef secName) : GOTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(AArch64GotAtomContent, 8); + } +}; + +class AArch64PLT0Atom : public PLT0Atom { +public: + AArch64PLT0Atom(const File &f) : PLT0Atom(f) {} + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(AArch64Plt0AtomContent, 32); + } +}; + +class AArch64PLTAtom : public PLTAtom { +public: + AArch64PLTAtom(const File &f, StringRef secName) : PLTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(AArch64PltAtomContent, 16); + } +}; + +class ELFPassFile : public SimpleFile { +public: + ELFPassFile(const ELFLinkingContext &eti) : SimpleFile("ELFPassFile") { + setOrdinal(eti.getNextOrdinalAndIncrement()); + } + + llvm::BumpPtrAllocator _alloc; +}; + +/// \brief CRTP base for handling relocations. +template <class Derived> class AArch64RelocationPass : public Pass { + /// \brief Handle a specific reference. + void handleReference(const DefinedAtom &atom, const Reference &ref) { + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() + << "\t" << LLVM_FUNCTION_NAME << "()" + << ": Name of Defined Atom: " << atom.name().str(); + llvm::dbgs() << " kindValue: " << ref.kindValue() << "\n"); + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case R_AARCH64_ABS32: + case R_AARCH64_ABS16: + case R_AARCH64_ABS64: + case R_AARCH64_PREL16: + case R_AARCH64_PREL32: + case R_AARCH64_PREL64: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_GOTREL32: + case R_AARCH64_GOTREL64: + static_cast<Derived *>(this)->handleGOT(ref); + break; + case R_AARCH64_ADR_PREL_PG_HI21: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_LDST8_ABS_LO12_NC: + case R_AARCH64_LDST16_ABS_LO12_NC: + case R_AARCH64_LDST32_ABS_LO12_NC: + case R_AARCH64_LDST64_ABS_LO12_NC: + case R_AARCH64_LDST128_ABS_LO12_NC: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_ADD_ABS_LO12_NC: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_CALL26: + case R_AARCH64_JUMP26: + case R_AARCH64_CONDBR19: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_TLSLE_ADD_TPREL_HI12: + case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_AARCH64_ADR_GOT_PAGE: + case R_AARCH64_LD64_GOT_LO12_NC: + case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: + case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: + static_cast<Derived *>(this)->handleGOT(ref); + break; + } + } + +protected: + /// \brief get the PLT entry for a given IFUNC Atom. + /// + /// If the entry does not exist. Both the GOT and PLT entry is created. + const PLTAtom *getIFUNCPLTEntry(const DefinedAtom *da) { + auto plt = _pltMap.find(da); + if (plt != _pltMap.end()) + return plt->second; + auto ga = new (_file._alloc) AArch64GOTAtom(_file, ".got.plt"); + ga->addReferenceELF_AArch64(R_AARCH64_IRELATIVE, 0, da, 0); + auto pa = new (_file._alloc) AArch64PLTAtom(_file, ".plt"); + pa->addReferenceELF_AArch64(R_AARCH64_PREL32, 2, ga, -4); +#ifndef NDEBUG + ga->_name = "__got_ifunc_"; + ga->_name += da->name(); + pa->_name = "__plt_ifunc_"; + pa->_name += da->name(); +#endif + _gotMap[da] = ga; + _pltMap[da] = pa; + _gotVector.push_back(ga); + _pltVector.push_back(pa); + return pa; + } + + /// \brief Redirect the call to the PLT stub for the target IFUNC. + /// + /// This create a PLT and GOT entry for the IFUNC if one does not exist. The + /// GOT entry and a IRELATIVE relocation to the original target resolver. + std::error_code handleIFUNC(const Reference &ref) { + auto target = dyn_cast_or_null<const DefinedAtom>(ref.target()); + if (target && target->contentType() == DefinedAtom::typeResolver) + const_cast<Reference &>(ref).setTarget(getIFUNCPLTEntry(target)); + return std::error_code(); + } + + /// \brief Create a GOT entry for the TP offset of a TLS atom. + const GOTAtom *getGOTTPOFF(const Atom *atom) { + auto got = _gotMap.find(atom); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) AArch64GOTAtom(_file, ".got"); + g->addReferenceELF_AArch64(R_AARCH64_GOTREL64, 0, atom, 0); +#ifndef NDEBUG + g->_name = "__got_tls_"; + g->_name += atom->name(); +#endif + _gotMap[atom] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + + /// \brief Create a TPOFF64 GOT entry and change the relocation to a PC32 to + /// the GOT. + void handleGOTTPOFF(const Reference &ref) { + const_cast<Reference &>(ref).setTarget(getGOTTPOFF(ref.target())); + const_cast<Reference &>(ref).setKindValue(R_AARCH64_PREL32); + } + + /// \brief Create a GOT entry containing 0. + const GOTAtom *getNullGOT() { + if (!_null) { + _null = new (_file._alloc) AArch64GOTAtom(_file, ".got.plt"); +#ifndef NDEBUG + _null->_name = "__got_null"; +#endif + } + return _null; + } + + const GOTAtom *getGOT(const DefinedAtom *da) { + auto got = _gotMap.find(da); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) AArch64GOTAtom(_file, ".got"); + g->addReferenceELF_AArch64(R_AARCH64_ABS64, 0, da, 0); +#ifndef NDEBUG + g->_name = "__got_"; + g->_name += da->name(); +#endif + _gotMap[da] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + +public: + AArch64RelocationPass(const ELFLinkingContext &ctx) + : _file(ctx), _ctx(ctx), _null(nullptr), _PLT0(nullptr), _got0(nullptr), + _got1(nullptr) {} + + /// \brief Do the pass. + /// + /// The goal here is to first process each reference individually. Each call + /// to handleReference may modify the reference itself and/or create new + /// atoms which must be stored in one of the maps below. + /// + /// After all references are handled, the atoms created during that are all + /// added to mf. + void perform(std::unique_ptr<MutableFile> &mf) override { + ScopedTask task(getDefaultDomain(), "AArch64 GOT/PLT Pass"); + DEBUG_WITH_TYPE( + "AArch64", llvm::dbgs() << "Undefined Atoms" + << "\n"; + for (const auto &atom + : mf->undefined()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } llvm::dbgs() + << "Shared Library Atoms" + << "\n"; + for (const auto &atom + : mf->sharedLibrary()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } llvm::dbgs() + << "Absolute Atoms" + << "\n"; + for (const auto &atom + : mf->absolute()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } + // Process all references. + llvm::dbgs() + << "Defined Atoms" + << "\n"); + for (const auto &atom : mf->defined()) { + for (const auto &ref : *atom) { + handleReference(*atom, *ref); + } + } + + // Add all created atoms to the link. + uint64_t ordinal = 0; + if (_PLT0) { + _PLT0->setOrdinal(ordinal++); + mf->addAtom(*_PLT0); + } + for (auto &plt : _pltVector) { + plt->setOrdinal(ordinal++); + mf->addAtom(*plt); + } + if (_null) { + _null->setOrdinal(ordinal++); + mf->addAtom(*_null); + } + if (_PLT0) { + _got0->setOrdinal(ordinal++); + _got1->setOrdinal(ordinal++); + mf->addAtom(*_got0); + mf->addAtom(*_got1); + } + for (auto &got : _gotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + for (auto obj : _objectVector) { + obj->setOrdinal(ordinal++); + mf->addAtom(*obj); + } + } + +protected: + /// \brief Owner of all the Atoms created by this pass. + ELFPassFile _file; + const ELFLinkingContext &_ctx; + + /// \brief Map Atoms to their GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotMap; + + /// \brief Map Atoms to their PLT entries. + llvm::DenseMap<const Atom *, PLTAtom *> _pltMap; + + /// \brief Map Atoms to their Object entries. + llvm::DenseMap<const Atom *, ObjectAtom *> _objectMap; + + /// \brief the list of GOT/PLT atoms + std::vector<GOTAtom *> _gotVector; + std::vector<PLTAtom *> _pltVector; + std::vector<ObjectAtom *> _objectVector; + + /// \brief GOT entry that is always 0. Used for undefined weaks. + GOTAtom *_null; + + /// \brief The got and plt entries for .PLT0. This is used to call into the + /// dynamic linker for symbol resolution. + /// @{ + PLT0Atom *_PLT0; + GOTAtom *_got0; + GOTAtom *_got1; + /// @} +}; + +/// This implements the static relocation model. Meaning GOT and PLT entries are +/// not created for references that can be directly resolved. These are +/// converted to a direct relocation. For entries that do require a GOT or PLT +/// entry, that entry is statically bound. +/// +/// TLS always assumes module 1 and attempts to remove indirection. +class AArch64StaticRelocationPass final + : public AArch64RelocationPass<AArch64StaticRelocationPass> { +public: + AArch64StaticRelocationPass(const elf::AArch64LinkingContext &ctx) + : AArch64RelocationPass(ctx) {} + + std::error_code handlePlain(const Reference &ref) { return handleIFUNC(ref); } + + std::error_code handlePLT32(const Reference &ref) { + // __tls_get_addr is handled elsewhere. + if (ref.target() && ref.target()->name() == "__tls_get_addr") { + const_cast<Reference &>(ref).setKindValue(R_AARCH64_NONE); + return std::error_code(); + } + // Static code doesn't need PLTs. + const_cast<Reference &>(ref).setKindValue(R_AARCH64_PREL32); + // Handle IFUNC. + if (const DefinedAtom *da = + dyn_cast_or_null<const DefinedAtom>(ref.target())) + if (da->contentType() == DefinedAtom::typeResolver) + return handleIFUNC(ref); + return std::error_code(); + } + + std::error_code handleGOT(const Reference &ref) { + if (isa<UndefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getNullGOT()); + else if (const DefinedAtom *da = dyn_cast<const DefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getGOT(da)); + return std::error_code(); + } +}; + +class AArch64DynamicRelocationPass final + : public AArch64RelocationPass<AArch64DynamicRelocationPass> { +public: + AArch64DynamicRelocationPass(const elf::AArch64LinkingContext &ctx) + : AArch64RelocationPass(ctx) {} + + const PLT0Atom *getPLT0() { + if (_PLT0) + return _PLT0; + // Fill in the null entry. + getNullGOT(); + _PLT0 = new (_file._alloc) AArch64PLT0Atom(_file); + _got0 = new (_file._alloc) AArch64GOTAtom(_file, ".got.plt"); + _got1 = new (_file._alloc) AArch64GOTAtom(_file, ".got.plt"); + _PLT0->addReferenceELF_AArch64(R_AARCH64_ADR_GOT_PAGE, 4, _got0, 0); + _PLT0->addReferenceELF_AArch64(R_AARCH64_LD64_GOT_LO12_NC, 8, _got1, 0); + _PLT0->addReferenceELF_AArch64(ADD_AARCH64_GOTRELINDEX, 12, _got1, 0); +#ifndef NDEBUG + _PLT0->_name = "__PLT0"; + _got0->_name = "__got0"; + _got1->_name = "__got1"; +#endif + return _PLT0; + } + + const PLTAtom *getPLTEntry(const Atom *a) { + auto plt = _pltMap.find(a); + if (plt != _pltMap.end()) + return plt->second; + auto ga = new (_file._alloc) AArch64GOTAtom(_file, ".got.plt"); + ga->addReferenceELF_AArch64(R_AARCH64_JUMP_SLOT, 0, a, 0); + auto pa = new (_file._alloc) AArch64PLTAtom(_file, ".plt"); + pa->addReferenceELF_AArch64(R_AARCH64_ADR_GOT_PAGE, 0, ga, 0); + pa->addReferenceELF_AArch64(R_AARCH64_LD64_GOT_LO12_NC, 4, ga, 0); + pa->addReferenceELF_AArch64(ADD_AARCH64_GOTRELINDEX, 8, ga, 0); + pa->addReferenceELF_AArch64(R_AARCH64_NONE, 12, getPLT0(), 0); + // Set the starting address of the got entry to the first instruction in + // the plt0 entry. + ga->addReferenceELF_AArch64(R_AARCH64_ABS32, 0, getPLT0(), 0); +#ifndef NDEBUG + ga->_name = "__got_"; + ga->_name += a->name(); + pa->_name = "__plt_"; + pa->_name += a->name(); +#endif + _gotMap[a] = ga; + _pltMap[a] = pa; + _gotVector.push_back(ga); + _pltVector.push_back(pa); + return pa; + } + + const ObjectAtom *getObjectEntry(const SharedLibraryAtom *a) { + auto obj = _objectMap.find(a); + if (obj != _objectMap.end()) + return obj->second; + + auto oa = new (_file._alloc) ObjectAtom(_file); + // This needs to point to the atom that we just created. + oa->addReferenceELF_AArch64(R_AARCH64_COPY, 0, oa, 0); + + oa->_name = a->name(); + oa->_size = a->size(); + + _objectMap[a] = oa; + _objectVector.push_back(oa); + return oa; + } + + std::error_code handlePlain(const Reference &ref) { + if (!ref.target()) + return std::error_code(); + if (auto sla = dyn_cast<SharedLibraryAtom>(ref.target())) { + if (sla->type() == SharedLibraryAtom::Type::Data) + const_cast<Reference &>(ref).setTarget(getObjectEntry(sla)); + else if (sla->type() == SharedLibraryAtom::Type::Code) + const_cast<Reference &>(ref).setTarget(getPLTEntry(sla)); + } else + return handleIFUNC(ref); + return std::error_code(); + } + + std::error_code handlePLT32(const Reference &ref) { + // Turn this into a PC32 to the PLT entry. + const_cast<Reference &>(ref).setKindValue(R_AARCH64_PREL32); + // Handle IFUNC. + if (const DefinedAtom *da = + dyn_cast_or_null<const DefinedAtom>(ref.target())) + if (da->contentType() == DefinedAtom::typeResolver) + return handleIFUNC(ref); + if (isa<const SharedLibraryAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getPLTEntry(ref.target())); + return std::error_code(); + } + + const GOTAtom *getSharedGOT(const SharedLibraryAtom *sla) { + auto got = _gotMap.find(sla); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) AArch64GOTAtom(_file, ".got"); + g->addReferenceELF_AArch64(R_AARCH64_GLOB_DAT, 0, sla, 0); +#ifndef NDEBUG + g->_name = "__got_"; + g->_name += sla->name(); +#endif + _gotMap[sla] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + + std::error_code handleGOT(const Reference &ref) { + if (isa<UndefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getNullGOT()); + else if (const DefinedAtom *da = dyn_cast<const DefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getGOT(da)); + else if (const auto sla = dyn_cast<const SharedLibraryAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getSharedGOT(sla)); + return std::error_code(); + } +}; +} // end anon namespace + +std::unique_ptr<Pass> +lld::elf::createAArch64RelocationPass(const AArch64LinkingContext &ctx) { + switch (ctx.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + if (ctx.isDynamic()) + return llvm::make_unique<AArch64DynamicRelocationPass>(ctx); + return llvm::make_unique<AArch64StaticRelocationPass>(ctx); + case llvm::ELF::ET_DYN: + return llvm::make_unique<AArch64DynamicRelocationPass>(ctx); + case llvm::ELF::ET_REL: + return nullptr; + default: + llvm_unreachable("Unhandled output file type"); + } +} diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.h b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.h new file mode 100644 index 000000000000..73d784e3b52d --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.h @@ -0,0 +1,32 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64RelocationPass.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares the relocation processing pass for AArch64. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_AARCH64_AARCH64_RELOCATION_PASS_H +#define LLD_READER_WRITER_ELF_AARCH64_AARCH64_RELOCATION_PASS_H + +#include <memory> + +namespace lld { +class Pass; +namespace elf { +class AArch64LinkingContext; + +/// \brief Create AArch64 relocation pass for the given linking context. +std::unique_ptr<Pass> +createAArch64RelocationPass(const AArch64LinkingContext &); +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.cpp b/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.cpp new file mode 100644 index 000000000000..607f767f8b8a --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.cpp @@ -0,0 +1,52 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "AArch64DynamicLibraryWriter.h" +#include "AArch64ExecutableWriter.h" +#include "AArch64LinkingContext.h" +#include "AArch64TargetHandler.h" + +using namespace lld; +using namespace elf; + +AArch64TargetHandler::AArch64TargetHandler(AArch64LinkingContext &context) + : _context(context), + _AArch64TargetLayout(new AArch64TargetLayout<AArch64ELFType>(context)), + _AArch64RelocationHandler(new AArch64TargetRelocationHandler()) {} + +void AArch64TargetHandler::registerRelocationNames(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, + Reference::KindArch::AArch64, kindStrings); +} + +std::unique_ptr<Writer> AArch64TargetHandler::getWriter() { + switch (this->_context.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>(new AArch64ExecutableWriter<AArch64ELFType>( + _context, *_AArch64TargetLayout.get())); + case llvm::ELF::ET_DYN: + return std::unique_ptr<Writer>( + new AArch64DynamicLibraryWriter<AArch64ELFType>( + _context, *_AArch64TargetLayout.get())); + case llvm::ELF::ET_REL: + llvm_unreachable("TODO: support -r mode"); + default: + llvm_unreachable("unsupported output type"); + } +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings AArch64TargetHandler::kindStrings[] = { +#include "llvm/Support/ELFRelocs/AArch64.def" + LLD_KIND_STRING_END +}; + +#undef ELF_RELOC diff --git a/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.h b/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.h new file mode 100644 index 000000000000..4eb6786cdf1f --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.h @@ -0,0 +1,64 @@ +//===- lib/ReaderWriter/ELF/AArch64/AArch64TargetHandler.h ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_AARCH64_AARCH64_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_AARCH64_AARCH64_TARGET_HANDLER_H + +#include "AArch64ELFFile.h" +#include "AArch64ELFReader.h" +#include "AArch64RelocationHandler.h" +#include "DefaultTargetHandler.h" +#include "TargetLayout.h" +#include "lld/Core/Simple.h" + +namespace lld { +namespace elf { +class AArch64LinkingContext; + +template <class ELFT> class AArch64TargetLayout : public TargetLayout<ELFT> { +public: + AArch64TargetLayout(AArch64LinkingContext &context) + : TargetLayout<ELFT>(context) {} +}; + +class AArch64TargetHandler final : public DefaultTargetHandler<AArch64ELFType> { +public: + AArch64TargetHandler(AArch64LinkingContext &context); + + AArch64TargetLayout<AArch64ELFType> &getTargetLayout() override { + return *(_AArch64TargetLayout.get()); + } + + void registerRelocationNames(Registry ®istry) override; + + const AArch64TargetRelocationHandler &getRelocationHandler() const override { + return *(_AArch64RelocationHandler.get()); + } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>(new AArch64ELFObjectReader(_context)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>(new AArch64ELFDSOReader(_context)); + } + + std::unique_ptr<Writer> getWriter() override; + +private: + static const Registry::KindStrings kindStrings[]; + AArch64LinkingContext &_context; + std::unique_ptr<AArch64TargetLayout<AArch64ELFType>> _AArch64TargetLayout; + std::unique_ptr<AArch64TargetRelocationHandler> _AArch64RelocationHandler; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/AArch64/CMakeLists.txt b/lib/ReaderWriter/ELF/AArch64/CMakeLists.txt new file mode 100644 index 000000000000..de94a4df5078 --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(lldAArch64ELFTarget + AArch64LinkingContext.cpp + AArch64TargetHandler.cpp + AArch64RelocationHandler.cpp + AArch64RelocationPass.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/AArch64/Makefile b/lib/ReaderWriter/ELF/AArch64/Makefile new file mode 100644 index 000000000000..02cff4747d0d --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/Makefile @@ -0,0 +1,15 @@ +##===- lld/lib/ReaderWriter/ELF/AArch64/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldAArch64ELFTarget +USEDLIBS = lldCore.a +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF/AArch64 -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/AArch64/TODO.rst b/lib/ReaderWriter/ELF/AArch64/TODO.rst new file mode 100644 index 000000000000..aa6f616ff33f --- /dev/null +++ b/lib/ReaderWriter/ELF/AArch64/TODO.rst @@ -0,0 +1,15 @@ +ELF AArch64 +~~~~~~~~~~~ + +Unimplemented Features +###################### + +* Just about everything! + +Unimplemented Relocations +######################### + +All of these relocations are defined in: +http://infocenter.arm.com/help/topic/com.arm.doc.ihi0056b/IHI0056B_aaelf64.pdf + + diff --git a/lib/ReaderWriter/ELF/ARM/ARMELFFile.h b/lib/ReaderWriter/ELF/ARM/ARMELFFile.h new file mode 100644 index 000000000000..bc5ee35b8213 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMELFFile.h @@ -0,0 +1,97 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMELFFile.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_ELF_FILE_H +#define LLD_READER_WRITER_ELF_ARM_ARM_ELF_FILE_H + +#include "ELFReader.h" + +namespace lld { +namespace elf { + +class ARMLinkingContext; + +template <class ELFT> class ARMELFDefinedAtom : public ELFDefinedAtom<ELFT> { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + ARMELFDefinedAtom(const ELFFile<ELFT> &file, StringRef symbolName, + StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) + : ELFDefinedAtom<ELFT>(file, symbolName, sectionName, symbol, section, + contentData, referenceStart, referenceEnd, + referenceList) {} + + bool isThumbFunc(const Elf_Sym *symbol) const { + return symbol->getType() == llvm::ELF::STT_FUNC && + (static_cast<uint64_t>(symbol->st_value) & 0x1); + } + + /// Correct st_value for symbols addressing Thumb instructions + /// by removing its zero bit. + uint64_t getSymbolValue(const Elf_Sym *symbol) const override { + const auto value = static_cast<uint64_t>(symbol->st_value); + return isThumbFunc(symbol) ? value & ~0x1 : value; + } + + DefinedAtom::CodeModel codeModel() const override { + if (isThumbFunc(this->_symbol)) + return DefinedAtom::codeARMThumb; + return DefinedAtom::codeNA; + } +}; + +template <class ELFT> class ARMELFFile : public ELFFile<ELFT> { +public: + ARMELFFile(std::unique_ptr<MemoryBuffer> mb, ARMLinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<ARMELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, ARMLinkingContext &ctx) { + return std::unique_ptr<ARMELFFile<ELFT>>( + new ARMELFFile<ELFT>(std::move(mb), ctx)); + } + +private: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + + /// Correct st_value for symbols addressing Thumb instructions + /// by removing its zero bit. + uint64_t getSymbolValue(const Elf_Sym *symbol) const override { + const auto value = static_cast<uint64_t>(symbol->st_value); + return symbol->getType() == llvm::ELF::STT_FUNC ? value & ~0x1 : value; + } + + /// Process the Defined symbol and create an atom for it. + ErrorOr<ELFDefinedAtom<ELFT> *> handleDefinedSymbol(StringRef symName, + StringRef sectionName, + const Elf_Sym *sym, const Elf_Shdr *sectionHdr, + ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) override { + return new (this->_readerStorage) ARMELFDefinedAtom<ELFT>( + *this, symName, sectionName, sym, sectionHdr, contentData, + referenceStart, referenceEnd, referenceList); + } +}; + +template <class ELFT> class ARMDynamicFile : public DynamicFile<ELFT> { +public: + ARMDynamicFile(const ARMLinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_ARM_ARM_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/ARM/ARMELFReader.h b/lib/ReaderWriter/ELF/ARM/ARMELFReader.h new file mode 100644 index 000000000000..31af531563ea --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMELFReader.h @@ -0,0 +1,62 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMELFReader.h --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ARM_ARM_ELF_READER_H +#define LLD_READER_WRITER_ARM_ARM_ELF_READER_H + +#include "ARMELFFile.h" +#include "ELFReader.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, false> ARMELFType; + +struct ARMDynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + ARMLinkingContext &ctx) { + return lld::elf::ARMDynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct ARMELFFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + ARMLinkingContext &ctx) { + return lld::elf::ARMELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +class ARMELFObjectReader + : public ELFObjectReader<ARMELFType, ARMELFFileCreateELFTraits, + ARMLinkingContext> { +public: + ARMELFObjectReader(ARMLinkingContext &ctx) + : ELFObjectReader<ARMELFType, ARMELFFileCreateELFTraits, + ARMLinkingContext>(ctx, llvm::ELF::EM_ARM) {} +}; + +class ARMELFDSOReader + : public ELFDSOReader<ARMELFType, ARMDynamicFileCreateELFTraits, + ARMLinkingContext> { +public: + ARMELFDSOReader(ARMLinkingContext &ctx) + : ELFDSOReader<ARMELFType, ARMDynamicFileCreateELFTraits, + ARMLinkingContext>(ctx, llvm::ELF::EM_ARM) {} +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ARM_ARM_ELF_READER_H diff --git a/lib/ReaderWriter/ELF/ARM/ARMExecutableWriter.h b/lib/ReaderWriter/ELF/ARM/ARMExecutableWriter.h new file mode 100644 index 000000000000..19311d516e4d --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMExecutableWriter.h @@ -0,0 +1,121 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMExecutableWriter.h -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_EXECUTABLE_WRITER_H +#define LLD_READER_WRITER_ELF_ARM_ARM_EXECUTABLE_WRITER_H + +#include "ExecutableWriter.h" +#include "ARMLinkingContext.h" +#include "ARMTargetHandler.h" +#include "ARMSymbolTable.h" + +namespace { +const char *gotSymbol = "_GLOBAL_OFFSET_TABLE_"; +} + +namespace lld { +namespace elf { + +template <class ELFT> +class ARMExecutableWriter : public ExecutableWriter<ELFT> { +public: + ARMExecutableWriter(ARMLinkingContext &context, + ARMTargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + void finalizeDefaultAtomValues() override; + + void addDefaultAtoms() override { + ExecutableWriter<ELFT>::addDefaultAtoms(); + } + + /// \brief Create symbol table. + unique_bump_ptr<SymbolTable<ELFT>> createSymbolTable() override; + + void processUndefinedSymbol(StringRef symName, + RuntimeFile<ELFT> &file) const override; + + // Setup the ELF header. + std::error_code setELFHeader() override; + +private: + ARMLinkingContext &_context; + ARMTargetLayout<ELFT> &_armLayout; +}; + +template <class ELFT> +ARMExecutableWriter<ELFT>::ARMExecutableWriter(ARMLinkingContext &context, + ARMTargetLayout<ELFT> &layout) + : ExecutableWriter<ELFT>(context, layout), _context(context), + _armLayout(layout) {} + +template <class ELFT> +bool ARMExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter<ELFT>::createImplicitFiles(result); + return true; +} + +template <class ELFT> +void ARMExecutableWriter<ELFT>::finalizeDefaultAtomValues() { + // Finalize the atom values that are part of the parent. + ExecutableWriter<ELFT>::finalizeDefaultAtomValues(); + auto gotAtomIter = _armLayout.findAbsoluteAtom(gotSymbol); + if (gotAtomIter != _armLayout.absoluteAtoms().end()) { + auto *gotAtom = *gotAtomIter; + if (auto gotpltSection = _armLayout.findOutputSection(".got.plt")) + gotAtom->_virtualAddr = gotpltSection->virtualAddr(); + else if (auto gotSection = _armLayout.findOutputSection(".got")) + gotAtom->_virtualAddr = gotSection->virtualAddr(); + else + gotAtom->_virtualAddr = 0; + } + // TODO: resolve addresses of __exidx_start/_end atoms +} + +template <class ELFT> +unique_bump_ptr<SymbolTable<ELFT>> + ARMExecutableWriter<ELFT>::createSymbolTable() { + return unique_bump_ptr<SymbolTable<ELFT>>( + new (this->_alloc) ARMSymbolTable<ELFT>(this->_context)); +} + +template <class ELFT> +void ARMExecutableWriter<ELFT>::processUndefinedSymbol( + StringRef symName, RuntimeFile<ELFT> &file) const { + if (symName == gotSymbol) { + file.addAbsoluteAtom(gotSymbol); + } else if (symName.startswith("__exidx")) { + file.addAbsoluteAtom("__exidx_start"); + file.addAbsoluteAtom("__exidx_end"); + } +} + +template <class ELFT> +std::error_code ARMExecutableWriter<ELFT>::setELFHeader() { + if (std::error_code ec = ExecutableWriter<ELFT>::setELFHeader()) + return ec; + + // Fixup entry point for Thumb code. + StringRef entryName = _context.entrySymbolName(); + if (const AtomLayout *al = _armLayout.findAtomLayoutByName(entryName)) { + const auto *ea = dyn_cast<DefinedAtom>(al->_atom); + if (ea && ea->codeModel() == DefinedAtom::codeARMThumb) + this->_elfHeader->e_entry(al->_virtualAddr | 0x1); + } + + return std::error_code(); +} + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_ARM_ARM_EXECUTABLE_WRITER_H diff --git a/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.cpp b/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.cpp new file mode 100644 index 000000000000..5f2436674268 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.cpp @@ -0,0 +1,34 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMLinkingContext.cpp -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMLinkingContext.h" +#include "ARMRelocationPass.h" +#include "ARMTargetHandler.h" + +using namespace lld; +using namespace lld::elf; + +std::unique_ptr<ELFLinkingContext> +elf::ARMLinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::arm) + return std::unique_ptr<ELFLinkingContext>( + new elf::ARMLinkingContext(triple)); + return nullptr; +} + +elf::ARMLinkingContext::ARMLinkingContext(llvm::Triple triple) + : ELFLinkingContext(triple, std::unique_ptr<TargetHandlerBase>( + new ARMTargetHandler(*this))) {} + +void elf::ARMLinkingContext::addPasses(PassManager &pm) { + auto pass = createARMRelocationPass(*this); + if (pass) + pm.add(std::move(pass)); + ELFLinkingContext::addPasses(pm); +} diff --git a/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.h b/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.h new file mode 100644 index 000000000000..249b79c4f07d --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMLinkingContext.h @@ -0,0 +1,36 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMLinkingContext.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_LINKING_CONTEXT_H +#define LLD_READER_WRITER_ELF_ARM_ARM_LINKING_CONTEXT_H + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { + +class ARMLinkingContext final : public ELFLinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + ARMLinkingContext(llvm::Triple); + + void addPasses(PassManager &) override; + + uint64_t getBaseAddress() const override { + if (_baseAddress == 0) + return 0x400000; + return _baseAddress; + } +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.cpp b/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.cpp new file mode 100644 index 000000000000..d24fdf0fa410 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.cpp @@ -0,0 +1,500 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.cpp ----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARMTargetHandler.h" +#include "ARMLinkingContext.h" + +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MathExtras.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::support::endian; + +static Reference::Addend readAddend_THM_MOV(const uint8_t *location) { + const uint16_t halfHi = read16le(location); + const uint16_t halfLo = read16le(location + 2); + + const uint16_t imm8 = halfLo & 0xFF; + const uint16_t imm3 = (halfLo >> 12) & 0x7; + + const uint16_t imm4 = halfHi & 0xF; + const uint16_t bitI = (halfHi >> 10) & 0x1; + + const auto result = int16_t((imm4 << 12) | (bitI << 11) | (imm3 << 8) | imm8); + return result; +} + +static Reference::Addend readAddend_ARM_MOV(const uint8_t *location) { + const uint32_t value = read32le(location); + + const uint32_t imm12 = value & 0xFFF; + const uint32_t imm4 = (value >> 16) & 0xF; + + const auto result = int32_t((imm4 << 12) | imm12); + return result; +} + +static Reference::Addend readAddend_THM_CALL(const uint8_t *location) { + const uint16_t halfHi = read16le(location); + const uint16_t halfLo = read16le(location + 2); + + const uint16_t imm10 = halfHi & 0x3FF; + const uint16_t bitS = (halfHi >> 10) & 0x1; + + const uint16_t imm11 = halfLo & 0x7FF; + const uint16_t bitJ2 = (halfLo >> 11) & 0x1; + const uint16_t bitI2 = (~(bitJ2 ^ bitS)) & 0x1; + const uint16_t bitJ1 = (halfLo >> 13) & 0x1; + const uint16_t bitI1 = (~(bitJ1 ^ bitS)) & 0x1; + + const auto result = int32_t((bitS << 24) | (bitI1 << 23) | (bitI2 << 22) | + (imm10 << 12) | (imm11 << 1)); + return llvm::SignExtend64<25>(result); +} + +static Reference::Addend readAddend_ARM_CALL(const uint8_t *location) { + const uint32_t value = read32le(location); + + const bool isBLX = (value & 0xF0000000) == 0xF0000000; + const uint32_t bitH = isBLX ? ((value & 0x1000000) >> 24) : 0; + + const auto result = int32_t(((value & 0xFFFFFF) << 2) | (bitH << 1)); + return llvm::SignExtend64<26>(result); +} + +static Reference::Addend readAddend_THM_JUMP11(const uint8_t *location) { + const auto value = read16le(location); + const uint16_t imm11 = value & 0x7FF; + + return llvm::SignExtend32<12>(imm11 << 1); +} + +static Reference::Addend readAddend(const uint8_t *location, + Reference::KindValue kindValue) { + switch (kindValue) { + case R_ARM_ABS32: + case R_ARM_REL32: + case R_ARM_TLS_IE32: + case R_ARM_TLS_LE32: + return (int32_t)read32le(location); + case R_ARM_PREL31: + return (int32_t)(read32le(location) & 0x7FFFFFFF); + case R_ARM_THM_CALL: + case R_ARM_THM_JUMP24: + return readAddend_THM_CALL(location); + case R_ARM_THM_JUMP11: + return readAddend_THM_JUMP11(location); + case R_ARM_CALL: + case R_ARM_JUMP24: + return readAddend_ARM_CALL(location); + case R_ARM_MOVW_ABS_NC: + case R_ARM_MOVT_ABS: + return readAddend_ARM_MOV(location); + case R_ARM_THM_MOVW_ABS_NC: + case R_ARM_THM_MOVT_ABS: + return readAddend_THM_MOV(location); + default: + return 0; + } +} + +static inline void applyArmReloc(uint8_t *location, uint32_t result, + uint32_t mask = 0xFFFFFFFF) { + assert(!(result & ~mask)); + write32le(location, (read32le(location) & ~mask) | (result & mask)); +} + +static inline void applyThmReloc(uint8_t *location, uint16_t resHi, + uint16_t resLo, uint16_t maskHi, + uint16_t maskLo = 0xFFFF) { + assert(!(resHi & ~maskHi) && !(resLo & ~maskLo)); + write16le(location, (read16le(location) & ~maskHi) | (resHi & maskHi)); + location += 2; + write16le(location, (read16le(location) & ~maskLo) | (resLo & maskLo)); +} + +static inline void applyThumb16Reloc(uint8_t *location, uint16_t result, + uint16_t mask = 0xFFFF) { + assert(!(result & ~mask)); + write16le(location, (read16le(location) & ~mask) | (result & mask)); +} + +/// \brief R_ARM_ABS32 - (S + A) | T +static void relocR_ARM_ABS32(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)((S + A) | T); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, result); +} + +/// \brief R_ARM_REL32 - ((S + A) | T) - P +static void relocR_ARM_REL32(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)(((S + A) | T) - P); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, result); +} + +/// \brief R_ARM_PREL31 - ((S + A) | T) - P +static void relocR_ARM_PREL31(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)(((S + A) | T) - P); + const uint32_t mask = 0x7FFFFFFF; + uint32_t rel31 = result & mask; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result); + llvm::dbgs() << " rel31: 0x" << Twine::utohexstr(rel31) << "\n"); + + applyArmReloc(location, rel31, mask); +} + +/// \brief Relocate B/BL instructions. useJs defines whether J1 & J2 are used +static void relocR_ARM_THM_B_L(uint8_t *location, uint32_t result, bool useJs) { + result = (result & 0x01FFFFFE) >> 1; + + const uint16_t imm10 = (result >> 11) & 0x3FF; + const uint16_t bitS = (result >> 23) & 0x1; + const uint16_t resHi = (bitS << 10) | imm10; + + const uint16_t imm11 = result & 0x7FF; + const uint16_t bitJ2 = useJs ? ((result >> 21) & 0x1) : bitS; + const uint16_t bitI2 = (~(bitJ2 ^ bitS)) & 0x1; + const uint16_t bitJ1 = useJs ? ((result >> 22) & 0x1) : bitS; + const uint16_t bitI1 = (~(bitJ1 ^ bitS)) & 0x1; + const uint16_t resLo = (bitI1 << 13) | (bitI2 << 11) | imm11; + + applyThmReloc(location, resHi, resLo, 0x7FF, 0x2FFF); +} + +/// \brief R_ARM_THM_CALL - ((S + A) | T) - P +static void relocR_ARM_THM_CALL(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool useJs, bool addressesThumb) { + uint64_t T = addressesThumb; + const bool switchMode = !addressesThumb; + + if (switchMode) { + P &= ~0x3; // Align(P, 4) by rounding down + } + + uint32_t result = (uint32_t)(((S + A) | T) - P); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + relocR_ARM_THM_B_L(location, result, useJs); + + if (switchMode) { + applyThmReloc(location, 0, 0, 0, 0x1001); + } +} + +/// \brief R_ARM_THM_JUMP24 - ((S + A) | T) - P +static void relocR_ARM_THM_JUMP24(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)(((S + A) | T) - P); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + relocR_ARM_THM_B_L(location, result, true); +} + +/// \brief R_ARM_THM_JUMP11 - S + A - P +static void relocR_ARM_THM_JUMP11(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + uint32_t result = (uint32_t)(S + A - P); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + + //we cut off first bit because it is always 1 according to p. 4.5.3 + result = (result & 0x0FFE) >> 1; + + applyThumb16Reloc(location, result, 0x7FF); +} + +/// \brief R_ARM_CALL - ((S + A) | T) - P +static void relocR_ARM_CALL(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + const bool switchMode = addressesThumb; + + uint32_t result = (uint32_t)(((S + A) | T) - P); + const uint32_t imm24 = (result & 0x03FFFFFC) >> 2; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, imm24, 0xFFFFFF); + + if (switchMode) { + const uint32_t bitH = (result & 0x2) >> 1; + applyArmReloc(location, (0xFA | bitH) << 24, 0xFF000000); + } +} + +/// \brief R_ARM_JUMP24 - ((S + A) | T) - P +static void relocR_ARM_JUMP24(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)(((S + A) | T) - P); + const uint32_t imm24 = (result & 0x03FFFFFC) >> 2; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, imm24, 0xFFFFFF); +} + +/// \brief Relocate ARM MOVW/MOVT instructions +static void relocR_ARM_MOV(uint8_t *location, uint32_t result) { + const uint32_t imm12 = result & 0xFFF; + const uint32_t imm4 = (result >> 12) & 0xF; + + applyArmReloc(location, (imm4 << 16) | imm12, 0xF0FFF); +} + +/// \brief R_ARM_MOVW_ABS_NC - (S + A) | T +static void relocR_ARM_MOVW_ABS_NC(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)((S + A) | T); + const uint32_t arg = result & 0x0000FFFF; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + return relocR_ARM_MOV(location, arg); +} + +/// \brief R_ARM_MOVT_ABS - S + A +static void relocR_ARM_MOVT_ABS(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + uint32_t result = (uint32_t)(S + A); + const uint32_t arg = (result & 0xFFFF0000) >> 16; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + return relocR_ARM_MOV(location, arg); +} + +/// \brief Relocate Thumb MOVW/MOVT instructions +static void relocR_ARM_THM_MOV(uint8_t *location, uint32_t result) { + const uint16_t imm8 = result & 0xFF; + const uint16_t imm3 = (result >> 8) & 0x7; + const uint16_t resLo = (imm3 << 12) | imm8; + + const uint16_t imm4 = (result >> 12) & 0xF; + const uint16_t bitI = (result >> 11) & 0x1; + const uint16_t resHi = (bitI << 10) | imm4; + + applyThmReloc(location, resHi, resLo, 0x40F, 0x70FF); +} + +/// \brief R_ARM_THM_MOVW_ABS_NC - (S + A) | T +static void relocR_ARM_THM_MOVW_ABS_NC(uint8_t *location, uint64_t P, + uint64_t S, int64_t A, + bool addressesThumb) { + uint64_t T = addressesThumb; + uint32_t result = (uint32_t)((S + A) | T); + const uint32_t arg = result & 0x0000FFFF; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " T: 0x" << Twine::utohexstr(T); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + return relocR_ARM_THM_MOV(location, arg); +} + +/// \brief R_ARM_THM_MOVT_ABS - S + A +static void relocR_ARM_THM_MOVT_ABS(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + uint32_t result = (uint32_t)(S + A); + const uint32_t arg = (result & 0xFFFF0000) >> 16; + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + return relocR_ARM_THM_MOV(location, arg); +} + +/// \brief R_ARM_TLS_IE32 - GOT(S) + A - P => S + A - P +static void relocR_ARM_TLS_IE32(uint8_t *location, uint64_t P, uint64_t S, + int64_t A) { + uint32_t result = (uint32_t)(S + A - P); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, result); +} + +/// \brief R_ARM_TLS_LE32 - S + A - tp => S + A + tpoff +static void relocR_ARM_TLS_LE32(uint8_t *location, uint64_t P, uint64_t S, + int64_t A, uint64_t tpoff) { + uint32_t result = (uint32_t)(S + A + tpoff); + + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t\tHandle " << LLVM_FUNCTION_NAME << " -"; + llvm::dbgs() << " S: 0x" << Twine::utohexstr(S); + llvm::dbgs() << " A: 0x" << Twine::utohexstr(A); + llvm::dbgs() << " P: 0x" << Twine::utohexstr(P); + llvm::dbgs() << " result: 0x" << Twine::utohexstr(result) << "\n"); + applyArmReloc(location, result); +} + +std::error_code ARMTargetRelocationHandler::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t targetVAddress = writer.addressOfAtom(ref.target()); + uint64_t relocVAddress = atom._virtualAddr + ref.offsetInAtom(); + + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::ARM); + + // Calculate proper initial addend for the relocation + const Reference::Addend addend = + readAddend(location, ref.kindValue()); + + // Flags that the relocation addresses Thumb instruction + bool addressesThumb = false; + + if (const auto *definedAtom = dyn_cast<DefinedAtom>(ref.target())) { + addressesThumb = (DefinedAtom::codeARMThumb == definedAtom->codeModel()); + } + + switch (ref.kindValue()) { + case R_ARM_NONE: + break; + case R_ARM_ABS32: + relocR_ARM_ABS32(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_REL32: + relocR_ARM_REL32(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_THM_CALL: + // TODO: consider adding bool variable to disable J1 & J2 for archs + // before ARMv6 + relocR_ARM_THM_CALL(location, relocVAddress, targetVAddress, addend, true, + addressesThumb); + break; + case R_ARM_CALL: + relocR_ARM_CALL(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_JUMP24: + relocR_ARM_JUMP24(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_THM_JUMP24: + relocR_ARM_THM_JUMP24(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_THM_JUMP11: + relocR_ARM_THM_JUMP11(location, relocVAddress, targetVAddress, addend); + break; + case R_ARM_MOVW_ABS_NC: + relocR_ARM_MOVW_ABS_NC(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_MOVT_ABS: + relocR_ARM_MOVT_ABS(location, relocVAddress, targetVAddress, addend); + break; + case R_ARM_THM_MOVW_ABS_NC: + relocR_ARM_THM_MOVW_ABS_NC(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_THM_MOVT_ABS: + relocR_ARM_THM_MOVT_ABS(location, relocVAddress, targetVAddress, addend); + break; + case R_ARM_PREL31: + relocR_ARM_PREL31(location, relocVAddress, targetVAddress, addend, + addressesThumb); + break; + case R_ARM_TLS_IE32: + relocR_ARM_TLS_IE32(location, relocVAddress, targetVAddress, addend); + break; + case R_ARM_TLS_LE32: + relocR_ARM_TLS_LE32(location, relocVAddress, targetVAddress, addend, + _armLayout.getTPOffset()); + break; + default: + return make_unhandled_reloc_error(); + } + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.h b/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.h new file mode 100644 index 000000000000..227d68617bf9 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.h @@ -0,0 +1,38 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMRelocationHandler.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_RELOCATION_HANDLER_H +#define LLD_READER_WRITER_ELF_ARM_ARM_RELOCATION_HANDLER_H + +#include "ARMTargetHandler.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, false> ARMELFType; + +template <class ELFT> class ARMTargetLayout; + +class ARMTargetRelocationHandler final + : public TargetRelocationHandler { +public: + ARMTargetRelocationHandler(ARMTargetLayout<ARMELFType> &layout) + : _armLayout(layout) {} + + std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const override; + +private: + ARMTargetLayout<ARMELFType> &_armLayout; +}; + +} // end namespace elf +} // end namespace lld + +#endif // LLD_READER_WRITER_ELF_ARM_ARM_RELOCATION_HANDLER_H diff --git a/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.cpp b/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.cpp new file mode 100644 index 000000000000..27ec66ac5557 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.cpp @@ -0,0 +1,373 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMRelocationPass.cpp -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the relocation processing pass for ARM. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +/// This also includes additional behavior that gnu-ld and gold implement but +/// which is not specified anywhere. +/// +//===----------------------------------------------------------------------===// + +#include "ARMRelocationPass.h" +#include "ARMLinkingContext.h" +#include "Atoms.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; + +// ARM B/BL instructions of static relocation veneer. +// TODO: consider different instruction set for archs below ARMv5 +// (one as for Thumb may be used though it's less optimal). +static const uint8_t Veneer_ARM_B_BL_StaticAtomContent[8] = { + 0x04, 0xf0, 0x1f, 0xe5, // ldr pc, [pc, #-4] + 0x00, 0x00, 0x00, 0x00 // <target_symbol_address> +}; + +// Thumb B/BL instructions of static relocation veneer. +// TODO: consider different instruction set for archs above ARMv5 +// (one as for ARM may be used since it's more optimal). +static const uint8_t Veneer_THM_B_BL_StaticAtomContent[8] = { + 0x78, 0x47, // bx pc + 0x00, 0x00, // nop + 0xfe, 0xff, 0xff, 0xea // b <target_symbol_address> +}; + +// .got values +static const uint8_t ARMGotAtomContent[4] = {0}; + +namespace { +/// \brief Atoms that hold veneer code. +class VeneerAtom : public SimpleELFDefinedAtom { + StringRef _section; + +public: + VeneerAtom(const File &f, StringRef secName) + : SimpleELFDefinedAtom(f), _section(secName) {} + + Scope scope() const override { return DefinedAtom::scopeTranslationUnit; } + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionBasedOnContent; + } + + StringRef customSectionName() const override { return _section; } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + uint64_t size() const override { return rawContent().size(); } + + ContentPermissions permissions() const override { return permR_X; } + + Alignment alignment() const override { return Alignment(2); } + + StringRef name() const override { return _name; } + std::string _name; +}; + +/// \brief Atoms that hold veneer for statically relocated +/// ARM B/BL instructions. +class Veneer_ARM_B_BL_StaticAtom : public VeneerAtom { +public: + Veneer_ARM_B_BL_StaticAtom(const File &f, StringRef secName) + : VeneerAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(Veneer_ARM_B_BL_StaticAtomContent); + } +}; + +/// \brief Atoms that hold veneer for statically relocated +/// Thumb B/BL instructions. +class Veneer_THM_B_BL_StaticAtom : public VeneerAtom { +public: + Veneer_THM_B_BL_StaticAtom(const File &f, StringRef secName) + : VeneerAtom(f, secName) {} + + DefinedAtom::CodeModel codeModel() const override { + return DefinedAtom::codeARMThumb; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(Veneer_THM_B_BL_StaticAtomContent); + } +}; + +/// \brief Atoms that are used by ARM dynamic linking +class ARMGOTAtom : public GOTAtom { +public: + ARMGOTAtom(const File &f, StringRef secName) : GOTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(ARMGotAtomContent); + } + + Alignment alignment() const override { return Alignment(2); } +}; + +class ELFPassFile : public SimpleFile { +public: + ELFPassFile(const ELFLinkingContext &eti) : SimpleFile("ELFPassFile") { + setOrdinal(eti.getNextOrdinalAndIncrement()); + } + + llvm::BumpPtrAllocator _alloc; +}; + +/// \brief CRTP base for handling relocations. +template <class Derived> class ARMRelocationPass : public Pass { + /// \brief Handle a specific reference. + void handleReference(const DefinedAtom &atom, const Reference &ref) { + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "\t" << LLVM_FUNCTION_NAME << "()" + << ": Name of Defined Atom: " << atom.name().str(); + llvm::dbgs() << " kindValue: " << ref.kindValue() << "\n"); + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + switch (ref.kindValue()) { + case R_ARM_JUMP24: + case R_ARM_THM_JUMP24: + static_cast<Derived *>(this)->handleVeneer(atom, ref); + break; + case R_ARM_TLS_IE32: + static_cast<Derived *>(this)->handleTLSIE32(ref); + break; + } + } + +protected: + std::error_code handleVeneer(const DefinedAtom &atom, const Reference &ref) { + // Target symbol and relocated place should have different + // instruction sets in order a veneer to be generated in between. + const auto *target = dyn_cast<DefinedAtom>(ref.target()); + if (!target || target->codeModel() == atom.codeModel()) + return std::error_code(); + + // TODO: For unconditional jump instructions (R_ARM_CALL and R_ARM_THM_CALL) + // fixup isn't possible without veneer generation for archs below ARMv5. + + // Veneers may only be generated for STT_FUNC target symbols + // or for symbols located in sections different to the place of relocation. + const auto kindValue = ref.kindValue(); + StringRef secName = atom.customSectionName(); + if (DefinedAtom::typeCode != target->contentType() && + !target->customSectionName().equals(secName)) { + StringRef kindValStr; + if (!this->_ctx.registry().referenceKindToString( + ref.kindNamespace(), ref.kindArch(), kindValue, kindValStr)) { + kindValStr = "unknown"; + } + + std::string errStr = + (Twine("Reference of type ") + Twine(kindValue) + " (" + kindValStr + + ") from " + atom.name() + "+" + Twine(ref.offsetInAtom()) + " to " + + ref.target()->name() + "+" + Twine(ref.addend()) + + " cannot be effected without a veneer").str(); + + llvm_unreachable(errStr.c_str()); + } + + const Atom *veneer = nullptr; + switch (kindValue) { + case R_ARM_JUMP24: + veneer = static_cast<Derived *>(this) + ->getVeneer_ARM_B_BL(target, secName); + break; + case R_ARM_THM_JUMP24: + veneer = static_cast<Derived *>(this) + ->getVeneer_THM_B_BL(target, secName); + break; + default: + llvm_unreachable("Unhandled reference type for veneer generation"); + } + + assert(veneer && "The veneer is not set"); + const_cast<Reference &>(ref).setTarget(veneer); + return std::error_code(); + } + + std::error_code handleTLSIE32(const Reference &ref) { + if (const auto *target = dyn_cast<DefinedAtom>(ref.target())) { + const_cast<Reference &>(ref).setTarget( + static_cast<Derived *>(this)->getTLSTPOFF32(target)); + return std::error_code(); + } + llvm_unreachable("R_ARM_TLS_IE32 reloc targets wrong atom type"); + } + + /// \brief Create a GOT entry for TLS with reloc type and addend specified. + template <Reference::KindValue R_ARM_TLS, Reference::Addend A = 0> + const GOTAtom *getGOTTLSEntry(const DefinedAtom *da) { + auto got = _gotMap.find(da); + if (got != _gotMap.end()) + return got->second; + auto g = new (_file._alloc) ARMGOTAtom(_file, ".got"); + g->addReferenceELF_ARM(R_ARM_TLS, 0, da, A); +#ifndef NDEBUG + g->_name = "__got_tls_"; + g->_name += da->name(); +#endif + _gotMap[da] = g; + _gotVector.push_back(g); + return g; + } + +public: + ARMRelocationPass(const ELFLinkingContext &ctx) : _file(ctx), _ctx(ctx) {} + + /// \brief Do the pass. + /// + /// The goal here is to first process each reference individually. Each call + /// to handleReference may modify the reference itself and/or create new + /// atoms which must be stored in one of the maps below. + /// + /// After all references are handled, the atoms created during that are all + /// added to mf. + void perform(std::unique_ptr<MutableFile> &mf) override { + ScopedTask task(getDefaultDomain(), "ARM GOT/PLT Pass"); + DEBUG_WITH_TYPE( + "ARM", llvm::dbgs() << "Undefined Atoms" << "\n"; + for (const auto &atom + : mf->undefined()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } + + llvm::dbgs() << "Shared Library Atoms" << "\n"; + for (const auto &atom + : mf->sharedLibrary()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } + + llvm::dbgs() << "Absolute Atoms" << "\n"; + for (const auto &atom + : mf->absolute()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + } + + llvm::dbgs() << "Defined Atoms" << "\n"; + for (const auto &atom + : mf->defined()) { + llvm::dbgs() << " Name of Atom: " << atom->name().str() << "\n"; + }); + + // Process all references. + for (const auto &atom : mf->defined()) { + for (const auto &ref : *atom) { + handleReference(*atom, *ref); + } + } + + // Add all created atoms to the link. + uint64_t ordinal = 0; + for (auto &got : _gotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + for (auto &veneer : _veneerVector) { + veneer->setOrdinal(ordinal++); + mf->addAtom(*veneer); + } + } + +protected: + /// \brief Owner of all the Atoms created by this pass. + ELFPassFile _file; + const ELFLinkingContext &_ctx; + + /// \brief Map Atoms to their GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotMap; + + /// \brief Map Atoms to their veneers. + llvm::DenseMap<const Atom *, VeneerAtom *> _veneerMap; + + /// \brief the list of GOT/PLT atoms + std::vector<GOTAtom *> _gotVector; + + /// \brief the list of veneer atoms. + std::vector<VeneerAtom *> _veneerVector; +}; + +/// This implements the static relocation model. Meaning GOT and PLT entries are +/// not created for references that can be directly resolved. These are +/// converted to a direct relocation. For entries that do require a GOT or PLT +/// entry, that entry is statically bound. +/// +/// TLS always assumes module 1 and attempts to remove indirection. +class ARMStaticRelocationPass final + : public ARMRelocationPass<ARMStaticRelocationPass> { +public: + ARMStaticRelocationPass(const elf::ARMLinkingContext &ctx) + : ARMRelocationPass(ctx) {} + + /// \brief Get the veneer for ARM B/BL instructions. + const VeneerAtom *getVeneer_ARM_B_BL(const DefinedAtom *da, + StringRef secName) { + auto veneer = _veneerMap.find(da); + if (_veneerMap.end() != veneer) + return veneer->second; + + auto v = new (_file._alloc) Veneer_ARM_B_BL_StaticAtom(_file, secName); + v->addReferenceELF_ARM(R_ARM_ABS32, 4, da, 0); + + v->_name = "__"; + v->_name += da->name(); + v->_name += "_from_arm"; + + _veneerMap[da] = v; + _veneerVector.push_back(v); + return v; + } + + /// \brief Get the veneer for Thumb B/BL instructions. + const VeneerAtom *getVeneer_THM_B_BL(const DefinedAtom *da, + StringRef secName) { + auto veneer = _veneerMap.find(da); + if (_veneerMap.end() != veneer) + return veneer->second; + + auto v = new (_file._alloc) Veneer_THM_B_BL_StaticAtom(_file, secName); + v->addReferenceELF_ARM(R_ARM_JUMP24, 4, da, 0); + + v->_name = "__"; + v->_name += da->name(); + v->_name += "_from_thumb"; + + _veneerMap[da] = v; + _veneerVector.push_back(v); + return v; + } + + /// \brief Create a GOT entry for R_ARM_TLS_TPOFF32 reloc. + const GOTAtom *getTLSTPOFF32(const DefinedAtom *da) { + return getGOTTLSEntry<R_ARM_TLS_LE32>(da); + } +}; + +} // end of anon namespace + +std::unique_ptr<Pass> +lld::elf::createARMRelocationPass(const ARMLinkingContext &ctx) { + switch (ctx.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + if (ctx.isDynamic()) + llvm_unreachable("Unhandled output file type"); + return llvm::make_unique<ARMStaticRelocationPass>(ctx); + default: + llvm_unreachable("Unhandled output file type"); + } +} diff --git a/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.h b/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.h new file mode 100644 index 000000000000..651e798f33b1 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMRelocationPass.h @@ -0,0 +1,31 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMRelocationPass.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares the relocation processing pass for ARM. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_RELOCATION_PASS_H +#define LLD_READER_WRITER_ELF_ARM_ARM_RELOCATION_PASS_H + +#include <memory> + +namespace lld { +class Pass; +namespace elf { +class ARMLinkingContext; + +/// \brief Create ARM relocation pass for the given linking context. +std::unique_ptr<Pass> createARMRelocationPass(const ARMLinkingContext &); +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/ARM/ARMSymbolTable.h b/lib/ReaderWriter/ELF/ARM/ARMSymbolTable.h new file mode 100644 index 000000000000..540a480421a8 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMSymbolTable.h @@ -0,0 +1,46 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMSymbolTable.h ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_SYMBOL_TABLE_H +#define LLD_READER_WRITER_ELF_ARM_ARM_SYMBOL_TABLE_H + +namespace lld { +namespace elf { + +/// \brief The SymbolTable class represents the symbol table in a ELF file +template<class ELFT> +class ARMSymbolTable : public SymbolTable<ELFT> { +public: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + + ARMSymbolTable(const ELFLinkingContext &context); + + void addDefinedAtom(Elf_Sym &sym, const DefinedAtom *da, + int64_t addr) override; +}; + +template <class ELFT> +ARMSymbolTable<ELFT>::ARMSymbolTable(const ELFLinkingContext &context) + : SymbolTable<ELFT>(context, ".symtab", + DefaultLayout<ELFT>::ORDER_SYMBOL_TABLE) {} + +template <class ELFT> +void ARMSymbolTable<ELFT>::addDefinedAtom(Elf_Sym &sym, const DefinedAtom *da, + int64_t addr) { + SymbolTable<ELFT>::addDefinedAtom(sym, da, addr); + + // Set zero bit to distinguish symbols addressing Thumb instructions + if (DefinedAtom::codeARMThumb == da->codeModel()) + sym.st_value = static_cast<int64_t>(sym.st_value) | 0x1; +} + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_ARM_ARM_SYMBOL_TABLE_H diff --git a/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.cpp b/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.cpp new file mode 100644 index 000000000000..de90f490f621 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.cpp @@ -0,0 +1,44 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMTargetHandler.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "ARMExecutableWriter.h" +#include "ARMTargetHandler.h" +#include "ARMLinkingContext.h" + +using namespace lld; +using namespace elf; + +ARMTargetHandler::ARMTargetHandler(ARMLinkingContext &context) + : _context(context), _armTargetLayout( + new ARMTargetLayout<ARMELFType>(context)), + _armRelocationHandler(new ARMTargetRelocationHandler( + *_armTargetLayout.get())) {} + +void ARMTargetHandler::registerRelocationNames(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, Reference::KindArch::ARM, + kindStrings); +} + +std::unique_ptr<Writer> ARMTargetHandler::getWriter() { + switch (this->_context.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>( + new ARMExecutableWriter<ARMELFType>(_context, *_armTargetLayout.get())); + default: + llvm_unreachable("unsupported output type"); + } +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings ARMTargetHandler::kindStrings[] = { +#include "llvm/Support/ELFRelocs/ARM.def" + LLD_KIND_STRING_END +}; diff --git a/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.h b/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.h new file mode 100644 index 000000000000..10641954da25 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/ARMTargetHandler.h @@ -0,0 +1,88 @@ +//===--------- lib/ReaderWriter/ELF/ARM/ARMTargetHandler.h ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ARM_ARM_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_ARM_ARM_TARGET_HANDLER_H + +#include "ARMELFFile.h" +#include "ARMELFReader.h" +#include "ARMRelocationHandler.h" +#include "DefaultTargetHandler.h" +#include "TargetLayout.h" + +#include "lld/Core/Simple.h" +#include "llvm/ADT/Optional.h" +#include <map> + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, false> ARMELFType; +class ARMLinkingContext; + +template <class ELFT> class ARMTargetLayout : public TargetLayout<ELFT> { +public: + ARMTargetLayout(ARMLinkingContext &context) + : TargetLayout<ELFT>(context) {} + + uint64_t getTPOffset() { + if (_tpOff.hasValue()) + return *_tpOff; + + for (const auto &phdr : *this->_programHeader) { + if (phdr->p_type == llvm::ELF::PT_TLS) { + _tpOff = llvm::RoundUpToAlignment(TCB_SIZE, phdr->p_align); + return *_tpOff; + } + } + llvm_unreachable("TLS segment not found"); + } + +private: + // TCB block size of the TLS. + enum { TCB_SIZE = 0x8 }; + + // Cached value of the TLS offset from the $tp pointer. + llvm::Optional<uint64_t> _tpOff; +}; + +class ARMTargetHandler final : public DefaultTargetHandler<ARMELFType> { +public: + ARMTargetHandler(ARMLinkingContext &context); + + ARMTargetLayout<ARMELFType> &getTargetLayout() override { + return *(_armTargetLayout.get()); + } + + void registerRelocationNames(Registry ®istry) override; + + const ARMTargetRelocationHandler &getRelocationHandler() const override { + return *(_armRelocationHandler.get()); + } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>(new ARMELFObjectReader(_context)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>(new ARMELFDSOReader(_context)); + } + + std::unique_ptr<Writer> getWriter() override; + +private: + static const Registry::KindStrings kindStrings[]; + ARMLinkingContext &_context; + std::unique_ptr<ARMTargetLayout<ARMELFType>> _armTargetLayout; + std::unique_ptr<ARMTargetRelocationHandler> _armRelocationHandler; +}; + +} // end namespace elf +} // end namespace lld + +#endif // LLD_READER_WRITER_ELF_ARM_ARM_TARGET_HANDLER_H diff --git a/lib/ReaderWriter/ELF/ARM/CMakeLists.txt b/lib/ReaderWriter/ELF/ARM/CMakeLists.txt new file mode 100644 index 000000000000..2ccf9eb6266d --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/CMakeLists.txt @@ -0,0 +1,12 @@ +add_llvm_library(lldARMELFTarget + ARMLinkingContext.cpp + ARMTargetHandler.cpp + ARMRelocationHandler.cpp + ARMRelocationPass.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/ARM/Makefile b/lib/ReaderWriter/ELF/ARM/Makefile new file mode 100644 index 000000000000..f67d36a1b612 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/Makefile @@ -0,0 +1,15 @@ +##===------ lld/lib/ReaderWriter/ELF/ARM/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldARMELFTarget +USEDLIBS = lldCore.a +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF/ARM -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/ARM/TODO.rst b/lib/ReaderWriter/ELF/ARM/TODO.rst new file mode 100644 index 000000000000..d05419decb78 --- /dev/null +++ b/lib/ReaderWriter/ELF/ARM/TODO.rst @@ -0,0 +1,20 @@ +ELF ARM +~~~~~~~~~~~ + +Unimplemented Features +###################### + +* Static executable linking - in progress +* Dynamic executable linking +* DSO linking +* PLT entries' generation for images larger than 2^28 bytes (see Sec. A.3 of the ELF reference) +* ARM and Thumb interworking (see http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0203j/Bcghfebi.html) +* .ARM.exidx section handling +* -init/-fini options +* Lots of relocations + +Unimplemented Relocations +######################### + +All of these relocations are defined in: +http://infocenter.arm.com/help/topic/com.arm.doc.ihi0044e/IHI0044E_aaelf.pdf diff --git a/lib/ReaderWriter/ELF/Atoms.h b/lib/ReaderWriter/ELF/Atoms.h new file mode 100644 index 000000000000..6a506d21d938 --- /dev/null +++ b/lib/ReaderWriter/ELF/Atoms.h @@ -0,0 +1,849 @@ +//===- lib/ReaderWriter/ELF/Atoms.h ---------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ATOMS_H +#define LLD_READER_WRITER_ELF_ATOMS_H + +#include "TargetHandler.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringSwitch.h" +#include <memory> +#include <vector> + +namespace lld { +namespace elf { +template <class ELFT> class DynamicFile; +template <typename ELFT> class ELFFile; + +/// \brief Relocation References: Defined Atoms may contain references that will +/// need to be patched before the executable is written. +/// +/// Construction of ELFReferences is two pass process. ELFReferences are +/// instantiated while we are iterating over symbol tables to atomize +/// symbols. At that time we only know the index of relocation target symbol +/// (not target atom) about a relocation, so we store the index to +/// ELFREference. In the second pass, ELFReferences are revisited to update +/// target atoms by target symbol indexes. +template <class ELFT> class ELFReference : public Reference { + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + ELFReference(const Elf_Rela *rela, uint64_t off, Reference::KindArch arch, + Reference::KindValue relocType, uint32_t idx) + : Reference(Reference::KindNamespace::ELF, arch, relocType), + _target(nullptr), _targetSymbolIndex(idx), _offsetInAtom(off), + _addend(rela->r_addend) {} + + ELFReference(uint64_t off, Reference::KindArch arch, + Reference::KindValue relocType, uint32_t idx) + : Reference(Reference::KindNamespace::ELF, arch, relocType), + _target(nullptr), _targetSymbolIndex(idx), _offsetInAtom(off), + _addend(0) {} + + ELFReference(uint32_t edgeKind) + : Reference(Reference::KindNamespace::all, Reference::KindArch::all, + edgeKind), + _target(nullptr), _targetSymbolIndex(0), _offsetInAtom(0), _addend(0) {} + + uint64_t offsetInAtom() const override { return _offsetInAtom; } + + const Atom *target() const override { return _target; } + + /// \brief The symbol table index that contains the target reference. + uint64_t targetSymbolIndex() const { + return _targetSymbolIndex; + } + + Addend addend() const override { return _addend; } + + virtual void setOffset(uint64_t off) { _offsetInAtom = off; } + + void setAddend(Addend A) override { _addend = A; } + + void setTarget(const Atom *newAtom) override { _target = newAtom; } + +private: + const Atom *_target; + uint64_t _targetSymbolIndex; + uint64_t _offsetInAtom; + Addend _addend; +}; + +/// \brief These atoms store symbols that are fixed to a particular address. +/// This atom has no content its address will be used by the writer to fixup +/// references that point to it. +template <class ELFT> class ELFAbsoluteAtom : public AbsoluteAtom { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + ELFAbsoluteAtom(const ELFFile<ELFT> &file, StringRef name, + const Elf_Sym *symbol, uint64_t value) + : _owningFile(file), _name(name), _symbol(symbol), _value(value) { + } + + const ELFFile<ELFT> &file() const override { return _owningFile; } + + Scope scope() const override { + if (_symbol->getVisibility() == llvm::ELF::STV_HIDDEN) + return scopeLinkageUnit; + if (_symbol->getBinding() == llvm::ELF::STB_LOCAL) + return scopeTranslationUnit; + return scopeGlobal; + } + + StringRef name() const override { return _name; } + + uint64_t value() const override { return _value; } + +private: + const ELFFile<ELFT> &_owningFile; + StringRef _name; + const Elf_Sym *_symbol; + uint64_t _value; +}; + +/// \brief ELFUndefinedAtom: These atoms store undefined symbols and are place +/// holders that will be replaced by defined atoms later in the linking process. +template <class ELFT> class ELFUndefinedAtom : public lld::UndefinedAtom { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + ELFUndefinedAtom(const File &file, StringRef name, const Elf_Sym *symbol) + : _owningFile(file), _name(name), _symbol(symbol) {} + + const File &file() const override { return _owningFile; } + + StringRef name() const override { return _name; } + + // A symbol in ELF can be undefined at build time if the symbol is a undefined + // weak symbol. + CanBeNull canBeNull() const override { + if (_symbol->getBinding() == llvm::ELF::STB_WEAK) + return CanBeNull::canBeNullAtBuildtime; + return CanBeNull::canBeNullNever; + } + +private: + const File &_owningFile; + StringRef _name; + const Elf_Sym *_symbol; +}; + +/// \brief This atom stores defined symbols and will contain either data or +/// code. +template <class ELFT> class ELFDefinedAtom : public DefinedAtom { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + ELFDefinedAtom(const ELFFile<ELFT> &file, StringRef symbolName, + StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) + : _owningFile(file), _symbolName(symbolName), _sectionName(sectionName), + _symbol(symbol), _section(section), _contentData(contentData), + _referenceStartIndex(referenceStart), _referenceEndIndex(referenceEnd), + _referenceList(referenceList), _contentType(typeUnknown), + _permissions(permUnknown) {} + + ~ELFDefinedAtom() {} + + const ELFFile<ELFT> &file() const override { return _owningFile; } + + StringRef name() const override { return _symbolName; } + + uint64_t ordinal() const override { return _ordinal; } + + const Elf_Sym *symbol() const { return _symbol; } + + const Elf_Shdr *section() const { return _section; } + + uint64_t size() const override { + // Common symbols are not allocated in object files, + // so use st_size to tell how many bytes are required. + if (_symbol && (_symbol->getType() == llvm::ELF::STT_COMMON || + _symbol->st_shndx == llvm::ELF::SHN_COMMON)) + return (uint64_t) _symbol->st_size; + + return _contentData.size(); + } + + Scope scope() const override { + if (!_symbol) + return scopeGlobal; + if (_symbol->getVisibility() == llvm::ELF::STV_HIDDEN) + return scopeLinkageUnit; + if (_symbol->getBinding() != llvm::ELF::STB_LOCAL) + return scopeGlobal; + return scopeTranslationUnit; + } + + // FIXME: Need to revisit this in future. + Interposable interposable() const override { return interposeNo; } + + Merge merge() const override { + if (!_symbol) + return mergeNo; + + if (_symbol->getBinding() == llvm::ELF::STB_WEAK) + return mergeAsWeak; + + if ((_symbol->getType() == llvm::ELF::STT_COMMON) || + _symbol->st_shndx == llvm::ELF::SHN_COMMON) + return mergeAsTentative; + + return mergeNo; + } + + ContentType contentType() const override { + if (_contentType != typeUnknown) + return _contentType; + + ContentType ret = typeUnknown; + uint64_t flags = _section->sh_flags; + + if (_section->sh_type == llvm::ELF::SHT_GROUP) + return typeGroupComdat; + + if (!_symbol && _sectionName.startswith(".gnu.linkonce")) + return typeGnuLinkOnce; + + if (!(flags & llvm::ELF::SHF_ALLOC)) + return _contentType = typeNoAlloc; + + if (_section->sh_flags == + (llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_WRITE | llvm::ELF::SHF_TLS)) { + return _contentType = _section->sh_type == llvm::ELF::SHT_NOBITS ? typeThreadZeroFill + : typeThreadData; + } + + if ((_section->sh_flags == llvm::ELF::SHF_ALLOC) && + (_section->sh_type == llvm::ELF::SHT_PROGBITS)) + return _contentType = typeConstant; + + if (_symbol->getType() == llvm::ELF::STT_GNU_IFUNC) + return _contentType = typeResolver; + + if (_symbol->st_shndx == llvm::ELF::SHN_COMMON) + return _contentType = typeZeroFill; + + switch (_section->sh_type) { + case llvm::ELF::SHT_PROGBITS: + flags &= ~llvm::ELF::SHF_ALLOC; + flags &= ~llvm::ELF::SHF_GROUP; + switch (flags) { + case llvm::ELF::SHF_EXECINSTR: + case (llvm::ELF::SHF_WRITE|llvm::ELF::SHF_EXECINSTR): + ret = typeCode; + break; + case llvm::ELF::SHF_WRITE: + ret = typeData; + break; + case (llvm::ELF::SHF_MERGE|llvm::ELF::SHF_STRINGS): + case llvm::ELF::SHF_STRINGS: + case llvm::ELF::SHF_MERGE: + ret = typeConstant; + break; + default: + ret = typeCode; + break; + } + break; + case llvm::ELF::SHT_NOTE: + flags &= ~llvm::ELF::SHF_ALLOC; + switch (flags) { + case llvm::ELF::SHF_WRITE: + ret = typeRWNote; + break; + default: + ret = typeRONote; + break; + } + break; + case llvm::ELF::SHT_NOBITS: + ret = typeZeroFill; + break; + case llvm::ELF::SHT_NULL: + if ((_symbol->getType() == llvm::ELF::STT_COMMON) + || _symbol->st_shndx == llvm::ELF::SHN_COMMON) + ret = typeZeroFill; + break; + case llvm::ELF::SHT_INIT_ARRAY: + case llvm::ELF::SHT_FINI_ARRAY: + ret = typeData; + break; + } + + return _contentType = ret; + } + + Alignment alignment() const override { + if (!_symbol) + return Alignment(0); + + // Obtain proper value of st_value field. + const auto symValue = getSymbolValue(_symbol); + + // Unallocated common symbols specify their alignment constraints in + // st_value. + if ((_symbol->getType() == llvm::ELF::STT_COMMON) || + _symbol->st_shndx == llvm::ELF::SHN_COMMON) { + return Alignment(llvm::Log2_64(symValue)); + } + if (_section->sh_addralign == 0) { + // sh_addralign of 0 means no alignment + return Alignment(0, symValue); + } + return Alignment(llvm::Log2_64(_section->sh_addralign), + symValue % _section->sh_addralign); + } + + // Do we have a choice for ELF? All symbols live in explicit sections. + SectionChoice sectionChoice() const override { + switch (contentType()) { + case typeCode: + case typeData: + case typeZeroFill: + case typeThreadZeroFill: + case typeThreadData: + case typeConstant: + if ((_sectionName == ".text") || (_sectionName == ".data") || + (_sectionName == ".bss") || (_sectionName == ".rodata") || + (_sectionName == ".tdata") || (_sectionName == ".tbss")) + return sectionBasedOnContent; + default: + break; + } + return sectionCustomRequired; + } + + StringRef customSectionName() const override { + if ((contentType() == typeZeroFill) || + (_symbol && _symbol->st_shndx == llvm::ELF::SHN_COMMON)) + return ".bss"; + return _sectionName; + } + + // It isn't clear that __attribute__((used)) is transmitted to the ELF object + // file. + DeadStripKind deadStrip() const override { return deadStripNormal; } + + ContentPermissions permissions() const override { + if (_permissions != permUnknown) + return _permissions; + + uint64_t flags = _section->sh_flags; + + if (!(flags & llvm::ELF::SHF_ALLOC)) + return _permissions = perm___; + + switch (_section->sh_type) { + // permRW_L is for sections modified by the runtime + // loader. + case llvm::ELF::SHT_REL: + case llvm::ELF::SHT_RELA: + return _permissions = permRW_L; + + case llvm::ELF::SHT_DYNAMIC: + case llvm::ELF::SHT_PROGBITS: + case llvm::ELF::SHT_NOTE: + flags &= ~llvm::ELF::SHF_ALLOC; + flags &= ~llvm::ELF::SHF_GROUP; + switch (flags) { + // Code + case llvm::ELF::SHF_EXECINSTR: + return _permissions = permR_X; + case (llvm::ELF::SHF_WRITE|llvm::ELF::SHF_EXECINSTR): + return _permissions = permRWX; + // Data + case llvm::ELF::SHF_WRITE: + return _permissions = permRW_; + // Strings + case llvm::ELF::SHF_MERGE: + case llvm::ELF::SHF_STRINGS: + return _permissions = permR__; + + default: + if (flags & llvm::ELF::SHF_WRITE) + return _permissions = permRW_; + return _permissions = permR__; + } + + case llvm::ELF::SHT_NOBITS: + return _permissions = permRW_; + + case llvm::ELF::SHT_INIT_ARRAY: + case llvm::ELF::SHT_FINI_ARRAY: + return _permissions = permRW_; + + default: + return _permissions = perm___; + } + } + + ArrayRef<uint8_t> rawContent() const override { return _contentData; } + + DefinedAtom::reference_iterator begin() const override { + uintptr_t index = _referenceStartIndex; + const void *it = reinterpret_cast<const void*>(index); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + uintptr_t index = _referenceEndIndex; + const void *it = reinterpret_cast<const void*>(index); + return reference_iterator(*this, it); + } + + const Reference *derefIterator(const void *It) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(It); + assert(index >= _referenceStartIndex); + assert(index < _referenceEndIndex); + return ((_referenceList)[index]); + } + + void incrementIterator(const void *&It) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(It); + ++index; + It = reinterpret_cast<const void *>(index); + } + + void addReference(ELFReference<ELFT> *reference) { + _referenceList.push_back(reference); + _referenceEndIndex = _referenceList.size(); + } + + virtual void setOrdinal(uint64_t ord) { _ordinal = ord; } + +protected: + /// Returns correct st_value for the symbol depending on the architecture. + /// For most architectures it's just a regular st_value with no changes. + virtual uint64_t getSymbolValue(const Elf_Sym *symbol) const { + return symbol->st_value; + } + +protected: + const ELFFile<ELFT> &_owningFile; + StringRef _symbolName; + StringRef _sectionName; + const Elf_Sym *_symbol; + const Elf_Shdr *_section; + /// \brief Holds the bits that make up the atom. + ArrayRef<uint8_t> _contentData; + + uint64_t _ordinal; + unsigned int _referenceStartIndex; + unsigned int _referenceEndIndex; + std::vector<ELFReference<ELFT> *> &_referenceList; + mutable ContentType _contentType; + mutable ContentPermissions _permissions; +}; + +/// \brief This atom stores mergeable Strings +template <class ELFT> class ELFMergeAtom : public DefinedAtom { + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + ELFMergeAtom(const ELFFile<ELFT> &file, StringRef sectionName, + const Elf_Shdr *section, ArrayRef<uint8_t> contentData, + uint64_t offset) + : _owningFile(file), _sectionName(sectionName), _section(section), + _contentData(contentData), _offset(offset) { + } + + const ELFFile<ELFT> &file() const override { return _owningFile; } + + StringRef name() const override { return ""; } + + virtual uint64_t section() const { return _section->sh_name; } + + virtual uint64_t offset() const { return _offset; } + + virtual void setOrdinal(uint64_t ord) { _ordinal = ord; } + + uint64_t ordinal() const override { return _ordinal; } + + uint64_t size() const override { return _contentData.size(); } + + Scope scope() const override { return scopeTranslationUnit; } + + Interposable interposable() const override { return interposeNo; } + + Merge merge() const override { return mergeByContent; } + + ContentType contentType() const override { return typeConstant; } + + Alignment alignment() const override { + return Alignment(llvm::Log2_64(_section->sh_addralign)); + } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _sectionName; } + + DeadStripKind deadStrip() const override { return deadStripNormal; } + + ContentPermissions permissions() const override { return permR__; } + + ArrayRef<uint8_t> rawContent() const override { return _contentData; } + + DefinedAtom::reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + + const Reference *derefIterator(const void *It) const override { + return nullptr; + } + + void incrementIterator(const void *&It) const override {} + +private: + + const ELFFile<ELFT> &_owningFile; + StringRef _sectionName; + const Elf_Shdr *_section; + /// \brief Holds the bits that make up the atom. + ArrayRef<uint8_t> _contentData; + uint64_t _ordinal; + uint64_t _offset; +}; + +template <class ELFT> class ELFCommonAtom : public DefinedAtom { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; +public: + ELFCommonAtom(const ELFFile<ELFT> &file, + StringRef symbolName, + const Elf_Sym *symbol) + : _owningFile(file), + _symbolName(symbolName), + _symbol(symbol) {} + + const ELFFile<ELFT> &file() const override { return _owningFile; } + + StringRef name() const override { return _symbolName; } + + uint64_t ordinal() const override { return _ordinal; } + + virtual void setOrdinal(uint64_t ord) { _ordinal = ord; } + + uint64_t size() const override { return _symbol->st_size; } + + Scope scope() const override { + if (_symbol->getVisibility() == llvm::ELF::STV_HIDDEN) + return scopeLinkageUnit; + if (_symbol->getBinding() != llvm::ELF::STB_LOCAL) + return scopeGlobal; + return scopeTranslationUnit; + } + + Interposable interposable() const override { return interposeNo; } + + Merge merge() const override { return mergeAsTentative; } + + ContentType contentType() const override { return typeZeroFill; } + + Alignment alignment() const override { + return Alignment(llvm::Log2_64(_symbol->st_value)); + } + + SectionChoice sectionChoice() const override { return sectionBasedOnContent; } + + StringRef customSectionName() const override { return ".bss"; } + + DeadStripKind deadStrip() const override { return deadStripNormal; } + + ContentPermissions permissions() const override { return permRW_; } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + + DefinedAtom::reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + + DefinedAtom::reference_iterator end() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + +protected: + const Reference *derefIterator(const void *iter) const override { + return nullptr; + } + + void incrementIterator(const void *&iter) const override {} + + const ELFFile<ELFT> &_owningFile; + StringRef _symbolName; + const Elf_Sym *_symbol; + uint64_t _ordinal; +}; + +/// \brief An atom from a shared library. +template <class ELFT> class ELFDynamicAtom : public SharedLibraryAtom { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + +public: + ELFDynamicAtom(const DynamicFile<ELFT> &file, StringRef symbolName, + StringRef loadName, const Elf_Sym *symbol) + : _owningFile(file), _symbolName(symbolName), _loadName(loadName), + _symbol(symbol) { + } + + const DynamicFile<ELFT> &file() const override { return _owningFile; } + + StringRef name() const override { return _symbolName; } + + virtual Scope scope() const { + if (_symbol->getVisibility() == llvm::ELF::STV_HIDDEN) + return scopeLinkageUnit; + if (_symbol->getBinding() != llvm::ELF::STB_LOCAL) + return scopeGlobal; + return scopeTranslationUnit; + } + + StringRef loadName() const override { return _loadName; } + + bool canBeNullAtRuntime() const override { + return _symbol->getBinding() == llvm::ELF::STB_WEAK; + } + + Type type() const override { + switch (_symbol->getType()) { + case llvm::ELF::STT_FUNC: + case llvm::ELF::STT_GNU_IFUNC: + return Type::Code; + case llvm::ELF::STT_OBJECT: + return Type::Data; + default: + return Type::Unknown; + } + } + + uint64_t size() const override { + return _symbol->st_size; + } + +private: + + const DynamicFile<ELFT> &_owningFile; + StringRef _symbolName; + StringRef _loadName; + const Elf_Sym *_symbol; +}; + +class SimpleELFDefinedAtom : public SimpleDefinedAtom { +public: + SimpleELFDefinedAtom(const File &f) : SimpleDefinedAtom(f) {} + + void addReferenceELF(Reference::KindArch arch, Reference::KindValue kindValue, + uint64_t off, const Atom *target, + Reference::Addend addend) { + this->addReference(Reference::KindNamespace::ELF, arch, kindValue, off, + target, addend); + } + + void addReferenceELF_Hexagon(Reference::KindValue relocType, uint64_t off, + const Atom *t, Reference::Addend a) { + this->addReferenceELF(Reference::KindArch::Hexagon, relocType, off, t, a); + } + + void addReferenceELF_x86_64(Reference::KindValue relocType, uint64_t off, + const Atom *t, Reference::Addend a) { + this->addReferenceELF(Reference::KindArch::x86_64, relocType, off, t, a); + } + + void addReferenceELF_Mips(Reference::KindValue relocType, uint64_t off, + const Atom *t, Reference::Addend a) { + this->addReferenceELF(Reference::KindArch::Mips, relocType, off, t, a); + } + + void addReferenceELF_AArch64(Reference::KindValue relocType, uint64_t off, + const Atom *t, Reference::Addend a) { + this->addReferenceELF(Reference::KindArch::AArch64, relocType, off, t, a); + } + + void addReferenceELF_ARM(Reference::KindValue relocType, uint64_t off, + const Atom *t, Reference::Addend a) { + this->addReferenceELF(Reference::KindArch::ARM, relocType, off, t, a); + } +}; + +/// \brief Atom which represents an object for which a COPY relocation will be +/// generated. +class ObjectAtom : public SimpleELFDefinedAtom { +public: + ObjectAtom(const File &f) : SimpleELFDefinedAtom(f) {} + + Scope scope() const override { return scopeGlobal; } + + SectionChoice sectionChoice() const override { return sectionBasedOnContent; } + + ContentType contentType() const override { return typeZeroFill; } + + uint64_t size() const override { return _size; } + + DynamicExport dynamicExport() const override { return dynamicExportAlways; } + + ContentPermissions permissions() const override { return permRW_; } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + + Alignment alignment() const override { + // The alignment should be 8 byte aligned + return Alignment(3); + } + + StringRef name() const override { return _name; } + + std::string _name; + uint64_t _size; +}; + +class GOTAtom : public SimpleELFDefinedAtom { + StringRef _section; + +public: + GOTAtom(const File &f, StringRef secName) + : SimpleELFDefinedAtom(f), _section(secName) {} + + Scope scope() const override { return scopeTranslationUnit; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _section; } + + ContentType contentType() const override { return typeGOT; } + + uint64_t size() const override { return rawContent().size(); } + + ContentPermissions permissions() const override { return permRW_; } + + Alignment alignment() const override { + // The alignment should be 8 byte aligned + return Alignment(3); + } + +#ifndef NDEBUG + StringRef name() const override { return _name; } + std::string _name; +#else + StringRef name() const override { return ""; } +#endif +}; + +class PLTAtom : public SimpleELFDefinedAtom { + StringRef _section; + +public: + PLTAtom(const File &f, StringRef secName) + : SimpleELFDefinedAtom(f), _section(secName) {} + + Scope scope() const override { return scopeTranslationUnit; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _section; } + + ContentType contentType() const override { return typeStub; } + + uint64_t size() const override { return rawContent().size(); } + + ContentPermissions permissions() const override { return permR_X; } + + Alignment alignment() const override { + return Alignment(4); // 16 + } + +#ifndef NDEBUG + StringRef name() const override { return _name; } + std::string _name; +#else + StringRef name() const override { return ""; } +#endif +}; + +class PLT0Atom : public PLTAtom { +public: + PLT0Atom(const File &f) : PLTAtom(f, ".plt") { +#ifndef NDEBUG + _name = ".PLT0"; +#endif + } +}; + +class GLOBAL_OFFSET_TABLEAtom : public SimpleELFDefinedAtom { +public: + GLOBAL_OFFSET_TABLEAtom(const File &f) : SimpleELFDefinedAtom(f) {} + + StringRef name() const override { return "_GLOBAL_OFFSET_TABLE_"; } + + Scope scope() const override { return scopeLinkageUnit; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return ".got.plt"; } + + ContentType contentType() const override { return typeGOT; } + + uint64_t size() const override { return 0; } + + ContentPermissions permissions() const override { return permRW_; } + + Alignment alignment() const override { + // Needs 8 byte alignment + return Alignment(3); + } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } +}; + +class DYNAMICAtom : public SimpleELFDefinedAtom { +public: + DYNAMICAtom(const File &f) : SimpleELFDefinedAtom(f) {} + + StringRef name() const override { return "_DYNAMIC"; } + + Scope scope() const override { return scopeLinkageUnit; } + + Merge merge() const override { return mergeNo; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return ".dynamic"; } + + ContentType contentType() const override { return typeData; } + + uint64_t size() const override { return 0; } + + ContentPermissions permissions() const override { return permRW_; } + + Alignment alignment() const override { return Alignment(0); } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/CMakeLists.txt b/lib/ReaderWriter/ELF/CMakeLists.txt new file mode 100644 index 000000000000..fd4cb669904d --- /dev/null +++ b/lib/ReaderWriter/ELF/CMakeLists.txt @@ -0,0 +1,19 @@ +add_llvm_library(lldELF + ELFLinkingContext.cpp + Reader.cpp + Writer.cpp + LINK_LIBS + lldReaderWriter + lldCore + lldYAML + LLVMSupport + ) + +include_directories(.) + +add_subdirectory(X86) +add_subdirectory(X86_64) +add_subdirectory(Mips) +add_subdirectory(Hexagon) +add_subdirectory(AArch64) +add_subdirectory(ARM) diff --git a/lib/ReaderWriter/ELF/Chunk.h b/lib/ReaderWriter/ELF/Chunk.h new file mode 100644 index 000000000000..2658d023b3a9 --- /dev/null +++ b/lib/ReaderWriter/ELF/Chunk.h @@ -0,0 +1,102 @@ +//===- lib/ReaderWriter/ELF/Chunks.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_CHUNKS_H +#define LLD_READER_WRITER_ELF_CHUNKS_H + +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <memory> + +namespace lld { +class ELFLinkingContext; + +namespace elf { +class ELFWriter; + +template <class ELFT> class TargetLayout; + +/// \brief A chunk is a contiguous region of space +template<class ELFT> +class Chunk { +public: + + /// \brief Describes the type of Chunk + enum Kind : uint8_t{ ELFHeader, ///< ELF Header + ProgramHeader, ///< Program Header + SectionHeader, ///< Section header + ELFSegment, ///< Segment + ELFSection, ///< Section + AtomSection, ///< A section containing atoms. + Expression ///< A linker script expression + }; + /// \brief the ContentType of the chunk + enum ContentType : uint8_t{ Unknown, Header, Code, Data, Note, TLS }; + + Chunk(StringRef name, Kind kind, const ELFLinkingContext &context) + : _name(name), _kind(kind), _fsize(0), _msize(0), _alignment(0), _order(0), + _ordinal(1), _start(0), _fileoffset(0), _context(context) {} + virtual ~Chunk() {} + // The name of the chunk + StringRef name() const { return _name; } + // Kind of chunk + Kind kind() const { return _kind; } + virtual uint64_t fileSize() const { return _fsize; } + virtual void setFileSize(uint64_t sz) { _fsize = sz; } + virtual void setAlign(uint64_t align) { _alignment = align; } + virtual uint64_t alignment() const { return _alignment; } + + // The ordinal value of the chunk + uint64_t ordinal() const { return _ordinal;} + void setOrdinal(uint64_t newVal) { _ordinal = newVal;} + // The order in which the chunk would appear in the output file + uint64_t order() const { return _order; } + void setOrder(uint32_t order) { _order = order; } + // Output file offset of the chunk + uint64_t fileOffset() const { return _fileoffset; } + void setFileOffset(uint64_t offset) { _fileoffset = offset; } + // Output start address of the chunk + virtual void setVirtualAddr(uint64_t start) { _start = start; } + virtual uint64_t virtualAddr() const { return _start; } + // Memory size of the chunk + uint64_t memSize() const { return _msize; } + void setMemSize(uint64_t msize) { _msize = msize; } + // Whats the contentType of the chunk? + virtual int getContentType() const = 0; + // Writer the chunk + virtual void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) = 0; + // Finalize the chunk before assigning offsets/virtual addresses + virtual void doPreFlight() = 0; + // Finalize the chunk before writing + virtual void finalize() = 0; + +protected: + StringRef _name; + Kind _kind; + uint64_t _fsize; + uint64_t _msize; + uint64_t _alignment; + uint32_t _order; + uint64_t _ordinal; + uint64_t _start; + uint64_t _fileoffset; + const ELFLinkingContext &_context; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/CreateELF.h b/lib/ReaderWriter/ELF/CreateELF.h new file mode 100644 index 000000000000..ad34dddb24d3 --- /dev/null +++ b/lib/ReaderWriter/ELF/CreateELF.h @@ -0,0 +1,118 @@ +//===- lib/ReaderWriter/ELF/CreateELF.h -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file provides a simple way to create an object templated on +/// ELFType depending on the runtime type needed. +/// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_CREATE_ELF_H +#define LLD_READER_WRITER_ELF_CREATE_ELF_H + +#include "llvm/Object/ELF.h" +#include "llvm/Support/Compiler.h" + +namespace { +using llvm::object::ELFType; + +/// \func createELF +/// \brief Create an object depending on the runtime attributes and alignment +/// of an ELF file. +/// +/// \param Traits +/// Traits::result_type must be a type convertable from what create returns. +/// Traits::create must be a template function which takes an ELFType and +/// returns something convertable to Traits::result_type. +/// +/// \param ident pair of EI_CLASS and EI_DATA. +/// \param maxAlignment the maximum alignment of the file. +/// \param args arguments forwarded to CreateELFTraits<T>::create. + +#define LLVM_CREATE_ELF_CreateELFTraits(endian, align, is64, ...) \ + Traits::template create<ELFType<llvm::support::endian, align, is64>>( \ + __VA_ARGS__); + +#if !LLVM_IS_UNALIGNED_ACCESS_FAST +# define LLVM_CREATE_ELF_MaxAlignCheck(normal, low, endian, is64, ...) \ + if (maxAlignment >= normal) \ + return LLVM_CREATE_ELF_CreateELFTraits(endian, normal, is64, __VA_ARGS__) \ + else if (maxAlignment >= low) \ + return LLVM_CREATE_ELF_CreateELFTraits(endian, low, is64, __VA_ARGS__) \ + else \ + llvm_unreachable("Invalid alignment for ELF file!"); +#else +# define LLVM_CREATE_ELF_MaxAlignCheck(normal, low, endian, is64, ...) \ + if (maxAlignment >= low) \ + return LLVM_CREATE_ELF_CreateELFTraits(endian, low, is64, __VA_ARGS__) \ + else \ + llvm_unreachable("Invalid alignment for ELF file!"); +#endif + +#define LLVM_CREATE_ELF_IMPL(...) \ + if (ident.first == llvm::ELF::ELFCLASS32 && \ + ident.second == llvm::ELF::ELFDATA2LSB) { \ + LLVM_CREATE_ELF_MaxAlignCheck(4, 2, little, false, __VA_ARGS__) \ + } else if (ident.first == llvm::ELF::ELFCLASS32 && \ + ident.second == llvm::ELF::ELFDATA2MSB) { \ + LLVM_CREATE_ELF_MaxAlignCheck(4, 2, big, false, __VA_ARGS__) \ + } else if (ident.first == llvm::ELF::ELFCLASS64 && \ + ident.second == llvm::ELF::ELFDATA2MSB) { \ + LLVM_CREATE_ELF_MaxAlignCheck(8, 2, big, true, __VA_ARGS__) \ + } else if (ident.first == llvm::ELF::ELFCLASS64 && \ + ident.second == llvm::ELF::ELFDATA2LSB) { \ + LLVM_CREATE_ELF_MaxAlignCheck(8, 2, little, true, __VA_ARGS__) \ + } \ + llvm_unreachable("Invalid ELF type!"); + +#if LLVM_HAS_VARIADIC_TEMPLATES +template <class Traits, class ...Args> +typename Traits::result_type createELF( + std::pair<unsigned char, unsigned char> ident, std::size_t maxAlignment, + Args &&...args) { + LLVM_CREATE_ELF_IMPL(std::forward<Args>(args)...) +} +#else +template <class Traits, class T1> +typename Traits::result_type createELF( + std::pair<unsigned char, unsigned char> ident, std::size_t maxAlignment, + T1 &&t1) { + LLVM_CREATE_ELF_IMPL(std::forward<T1>(t1)) +} + +template <class Traits, class T1, class T2> +typename Traits::result_type createELF( + std::pair<unsigned char, unsigned char> ident, std::size_t maxAlignment, + T1 &&t1, T2 &&t2) { + LLVM_CREATE_ELF_IMPL(std::forward<T1>(t1), std::forward<T2>(t2)) +} + +template <class Traits, class T1, class T2, class T3> +typename Traits::result_type createELF( + std::pair<unsigned char, unsigned char> ident, std::size_t maxAlignment, + T1 &&t1, T2 &&t2, T3 &&t3) { + LLVM_CREATE_ELF_IMPL(std::forward<T1>(t1), std::forward<T2>(t2), + std::forward<T3>(t3)) +} + +template <class Traits, class T1, class T2, class T3, class T4> +typename Traits::result_type createELF( + std::pair<unsigned char, unsigned char> ident, std::size_t maxAlignment, + T1 &&t1, T2 &&t2, T3 &&t3, T4 &&t4) { + LLVM_CREATE_ELF_IMPL(std::forward<T1>(t1), std::forward<T2>(t2), + std::forward<T3>(t3), std::forward<T4>(t4)) +} + +#endif // LLVM_HAS_VARIADIC_TEMPLATES +} // end anon namespace + +#undef LLVM_CREATE_ELF_CreateELFTraits +#undef LLVM_CREATE_ELF_MaxAlignCheck +#undef LLVM_CREATE_ELF_IMPL + +#endif diff --git a/lib/ReaderWriter/ELF/DefaultLayout.h b/lib/ReaderWriter/ELF/DefaultLayout.h new file mode 100644 index 000000000000..9af3b8eb8dc6 --- /dev/null +++ b/lib/ReaderWriter/ELF/DefaultLayout.h @@ -0,0 +1,1050 @@ +//===- lib/ReaderWriter/ELF/DefaultLayout.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_DEFAULT_LAYOUT_H +#define LLD_READER_WRITER_ELF_DEFAULT_LAYOUT_H + +#include "Atoms.h" +#include "Chunk.h" +#include "HeaderChunks.h" +#include "Layout.h" +#include "SectionChunks.h" +#include "SegmentChunks.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/STDExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Format.h" +#include <map> +#include <unordered_map> + +namespace lld { +namespace elf { +/// \brief The DefaultLayout class is used by the Writer to arrange +/// sections and segments in the order determined by the target ELF +/// format. The writer creates a single instance of the DefaultLayout +/// class +template<class ELFT> +class DefaultLayout : public Layout { +public: + + // The order in which the sections appear in the output file + // If its determined, that the layout needs to change + // just changing the order of enumerations would essentially + // change the layout in the output file + // Change the enumerations so that Target can override and stick + // a section anywhere it wants to + enum DefaultSectionOrder { + ORDER_NOT_DEFINED = 0, + ORDER_INTERP = 10, + ORDER_RO_NOTE = 15, + ORDER_HASH = 30, + ORDER_DYNAMIC_SYMBOLS = 40, + ORDER_DYNAMIC_STRINGS = 50, + ORDER_DYNAMIC_RELOCS = 52, + ORDER_DYNAMIC_PLT_RELOCS = 54, + ORDER_INIT = 60, + ORDER_PLT = 70, + ORDER_TEXT = 80, + ORDER_FINI = 90, + ORDER_REL = 95, + ORDER_RODATA = 100, + ORDER_EH_FRAME = 110, + ORDER_EH_FRAMEHDR = 120, + ORDER_TDATA = 124, + ORDER_TBSS = 128, + ORDER_CTORS = 130, + ORDER_DTORS = 140, + ORDER_INIT_ARRAY = 150, + ORDER_FINI_ARRAY = 160, + ORDER_DYNAMIC = 170, + ORDER_GOT = 180, + ORDER_GOT_PLT = 190, + ORDER_DATA = 200, + ORDER_RW_NOTE = 205, + ORDER_BSS = 210, + ORDER_NOALLOC = 215, + ORDER_OTHER = 220, + ORDER_SECTION_STRINGS = 230, + ORDER_SYMBOL_TABLE = 240, + ORDER_STRING_TABLE = 250, + ORDER_SECTION_HEADERS = 260 + }; + +public: + + // The Key used for creating Sections + // The sections are created using + // SectionName, contentPermissions + struct SectionKey { + SectionKey(StringRef name, DefinedAtom::ContentPermissions perm, + StringRef path) + : _name(name), _perm(perm), _path(path) {} + + // Data members + StringRef _name; + DefinedAtom::ContentPermissions _perm; + StringRef _path; + }; + + struct SectionKeyHash { + int64_t operator()(const SectionKey &k) const { + return llvm::hash_combine(k._name, k._perm, k._path); + } + }; + + struct SectionKeyEq { + bool operator()(const SectionKey &lhs, const SectionKey &rhs) const { + return ((lhs._name == rhs._name) && (lhs._perm == rhs._perm) && + (lhs._path == rhs._path)); + } + }; + + typedef typename std::vector<Chunk<ELFT> *>::iterator ChunkIter; + typedef typename std::vector<Segment<ELFT> *>::iterator SegmentIter; + + // The additional segments are used to figure out + // if there is a segment by that type already created + // For example : PT_TLS, we have two sections .tdata/.tbss + // that are part of PT_TLS, we need to create this additional + // segment only once + typedef std::pair<int64_t, int64_t> AdditionalSegmentKey; + // The segments are created using + // SegmentName, Segment flags + typedef std::pair<StringRef, int64_t> SegmentKey; + + // HashKey for the Segment + class SegmentHashKey { + public: + int64_t operator() (const SegmentKey &k) const { + // k.first = SegmentName + // k.second = SegmentFlags + return llvm::hash_combine(k.first, k.second); + } + }; + + class AdditionalSegmentHashKey { + public: + int64_t operator()(const AdditionalSegmentKey &k) const { + // k.first = SegmentName + // k.second = SegmentFlags + return llvm::hash_combine(k.first, k.second); + } + }; + + // Output Sections contain the map of Sectionnames to a vector of sections, + // that have been merged to form a single section + typedef llvm::StringMap<OutputSection<ELFT> *> OutputSectionMapT; + typedef + typename std::vector<OutputSection<ELFT> *>::iterator OutputSectionIter; + + typedef std::unordered_map<SectionKey, AtomSection<ELFT> *, SectionKeyHash, + SectionKeyEq> SectionMapT; + typedef std::unordered_map<AdditionalSegmentKey, Segment<ELFT> *, + AdditionalSegmentHashKey> AdditionalSegmentMapT; + typedef std::unordered_map<SegmentKey, Segment<ELFT> *, SegmentHashKey> + SegmentMapT; + + /// \brief find a absolute atom pair given a absolute atom name + struct FindByName { + const std::string _name; + FindByName(StringRef name) : _name(name) {} + bool operator()(const lld::AtomLayout *j) { return j->_atom->name() == _name; } + }; + + typedef typename std::vector<lld::AtomLayout *>::iterator AbsoluteAtomIterT; + + typedef llvm::DenseSet<const Atom *> AtomSetT; + + DefaultLayout(ELFLinkingContext &context) + : _context(context), _linkerScriptSema(context.linkerScriptSema()) {} + + /// \brief Return the section order for a input section + SectionOrder getSectionOrder(StringRef name, int32_t contentType, + int32_t contentPermissions) override; + + /// \brief Return the name of the input section by decoding the input + /// sectionChoice. + virtual StringRef getInputSectionName(const DefinedAtom *da) const; + + /// \brief Return the name of the output section from the input section. + virtual StringRef getOutputSectionName(StringRef archivePath, + StringRef memberPath, + StringRef inputSectionName) const; + + /// \brief Gets or creates a section. + AtomSection<ELFT> * + getSection(StringRef name, int32_t contentType, + DefinedAtom::ContentPermissions contentPermissions, + const DefinedAtom *da); + + /// \brief Gets the segment for a output section + virtual Layout::SegmentType getSegmentType(Section<ELFT> *section) const; + + /// \brief Returns true/false depending on whether the section has a Output + // segment or not + static bool hasOutputSegment(Section<ELFT> *section); + + // Adds an atom to the section + ErrorOr<const lld::AtomLayout *> addAtom(const Atom *atom) override; + + /// \brief Find an output Section given a section name. + OutputSection<ELFT> *findOutputSection(StringRef name) { + auto iter = _outputSectionMap.find(name); + if (iter == _outputSectionMap.end()) + return nullptr; + return iter->second; + } + + /// \brief find a absolute atom given a name + AbsoluteAtomIterT findAbsoluteAtom(StringRef name) { + return std::find_if(_absoluteAtoms.begin(), _absoluteAtoms.end(), + FindByName(name)); + } + + // Output sections with the same name into a OutputSection + void createOutputSections(); + + /// \brief Sort the sections by their order as defined by the layout, + /// preparing all sections to be assigned to a segment. + virtual void sortInputSections(); + + /// \brief Add extra chunks to a segment just before including the input + /// section given by <archivePath, memberPath, sectionName>. This + /// is used to add linker script expressions before each section. + virtual void addExtraChunksToSegment(Segment<ELFT> *segment, + StringRef archivePath, + StringRef memberPath, + StringRef sectionName); + + void assignSectionsToSegments() override; + + void assignVirtualAddress() override; + + void assignFileOffsetsForMiscSections(); + + range<AbsoluteAtomIterT> absoluteAtoms() { return _absoluteAtoms; } + + void addSection(Chunk<ELFT> *c) { _sections.push_back(c); } + + void finalize() { + ScopedTask task(getDefaultDomain(), "Finalize layout"); + for (auto &si : _sections) + si->finalize(); + } + + void doPreFlight() { + for (auto &si : _sections) + si->doPreFlight(); + } + + const AtomLayout *findAtomLayoutByName(StringRef name) const override { + for (auto sec : _sections) + if (auto section = dyn_cast<Section<ELFT>>(sec)) + if (auto *al = section->findAtomLayoutByName(name)) + return al; + return nullptr; + } + + void setHeader(ELFHeader<ELFT> *elfHeader) { _elfHeader = elfHeader; } + + void setProgramHeader(ProgramHeader<ELFT> *p) { + _programHeader = p; + } + + range<OutputSectionIter> outputSections() { return _outputSections; } + + range<ChunkIter> sections() { return _sections; } + + range<SegmentIter> segments() { return _segments; } + + ELFHeader<ELFT> *getHeader() { return _elfHeader; } + + bool hasDynamicRelocationTable() const { return !!_dynamicRelocationTable; } + + bool hasPLTRelocationTable() const { return !!_pltRelocationTable; } + + /// \brief Get or create the dynamic relocation table. All relocations in this + /// table are processed at startup. + RelocationTable<ELFT> *getDynamicRelocationTable() { + if (!_dynamicRelocationTable) { + _dynamicRelocationTable = std::move(createRelocationTable( + _context.isRelaOutputFormat() ? ".rela.dyn" : ".rel.dyn", + ORDER_DYNAMIC_RELOCS)); + addSection(_dynamicRelocationTable.get()); + } + return _dynamicRelocationTable.get(); + } + + /// \brief Get or create the PLT relocation table. Referenced by DT_JMPREL. + RelocationTable<ELFT> *getPLTRelocationTable() { + if (!_pltRelocationTable) { + _pltRelocationTable = std::move(createRelocationTable( + _context.isRelaOutputFormat() ? ".rela.plt" : ".rel.plt", + ORDER_DYNAMIC_PLT_RELOCS)); + addSection(_pltRelocationTable.get()); + } + return _pltRelocationTable.get(); + } + + uint64_t getTLSSize() const { + for (const auto &phdr : *_programHeader) + if (phdr->p_type == llvm::ELF::PT_TLS) + return phdr->p_memsz; + return 0; + } + + bool isReferencedByDefinedAtom(const Atom *a) const { + return _referencedDynAtoms.count(a); + } + + bool isCopied(const SharedLibraryAtom *sla) const { + return _copiedDynSymNames.count(sla->name()); + } + + /// \brief Handle SORT_BY_PRIORITY. + void sortOutputSectionByPriority(StringRef outputSectionName, + StringRef prefix); + +protected: + /// \brief TargetLayouts may use these functions to reorder the input sections + /// in a order defined by their ABI. + virtual void finalizeOutputSectionLayout() {} + + /// \brief Allocate a new section. + virtual AtomSection<ELFT> *createSection( + StringRef name, int32_t contentType, + DefinedAtom::ContentPermissions contentPermissions, + SectionOrder sectionOrder); + + /// \brief Create a new relocation table. + virtual unique_bump_ptr<RelocationTable<ELFT>> + createRelocationTable(StringRef name, int32_t order) { + return unique_bump_ptr<RelocationTable<ELFT>>( + new (_allocator) RelocationTable<ELFT>(_context, name, order)); + } + +private: + /// Helper function that returns the priority value from an input section. + uint32_t getPriorityFromSectionName(StringRef sectionName) const; + +protected: + llvm::BumpPtrAllocator _allocator; + SectionMapT _sectionMap; + OutputSectionMapT _outputSectionMap; + AdditionalSegmentMapT _additionalSegmentMap; + SegmentMapT _segmentMap; + std::vector<Chunk<ELFT> *> _sections; + std::vector<Segment<ELFT> *> _segments; + std::vector<OutputSection<ELFT> *> _outputSections; + ELFHeader<ELFT> *_elfHeader; + ProgramHeader<ELFT> *_programHeader; + unique_bump_ptr<RelocationTable<ELFT>> _dynamicRelocationTable; + unique_bump_ptr<RelocationTable<ELFT>> _pltRelocationTable; + std::vector<lld::AtomLayout *> _absoluteAtoms; + AtomSetT _referencedDynAtoms; + llvm::StringSet<> _copiedDynSymNames; + ELFLinkingContext &_context; + script::Sema &_linkerScriptSema; +}; + +template <class ELFT> +Layout::SectionOrder DefaultLayout<ELFT>::getSectionOrder( + StringRef name, int32_t contentType, int32_t contentPermissions) { + switch (contentType) { + case DefinedAtom::typeResolver: + case DefinedAtom::typeCode: + return llvm::StringSwitch<Layout::SectionOrder>(name) + .StartsWith(".eh_frame_hdr", ORDER_EH_FRAMEHDR) + .StartsWith(".eh_frame", ORDER_EH_FRAME) + .StartsWith(".init", ORDER_INIT) + .StartsWith(".fini", ORDER_FINI) + .StartsWith(".hash", ORDER_HASH) + .Default(ORDER_TEXT); + + case DefinedAtom::typeConstant: + return ORDER_RODATA; + + case DefinedAtom::typeData: + case DefinedAtom::typeDataFast: + return llvm::StringSwitch<Layout::SectionOrder>(name) + .StartsWith(".init_array", ORDER_INIT_ARRAY) + .StartsWith(".fini_array", ORDER_FINI_ARRAY) + .StartsWith(".dynamic", ORDER_DYNAMIC) + .StartsWith(".ctors", ORDER_CTORS) + .StartsWith(".dtors", ORDER_DTORS) + .Default(ORDER_DATA); + + case DefinedAtom::typeZeroFill: + case DefinedAtom::typeZeroFillFast: + return ORDER_BSS; + + case DefinedAtom::typeGOT: + return llvm::StringSwitch<Layout::SectionOrder>(name) + .StartsWith(".got.plt", ORDER_GOT_PLT) + .Default(ORDER_GOT); + + case DefinedAtom::typeStub: + return ORDER_PLT; + + case DefinedAtom::typeRONote: + return ORDER_RO_NOTE; + + case DefinedAtom::typeRWNote: + return ORDER_RW_NOTE; + + case DefinedAtom::typeNoAlloc: + return ORDER_NOALLOC; + + case DefinedAtom::typeThreadData: + return ORDER_TDATA; + case DefinedAtom::typeThreadZeroFill: + return ORDER_TBSS; + default: + // If we get passed in a section push it to OTHER + if (contentPermissions == DefinedAtom::perm___) + return ORDER_OTHER; + + return ORDER_NOT_DEFINED; + } +} + +/// \brief This maps the input sections to the output section names +template <class ELFT> +StringRef +DefaultLayout<ELFT>::getInputSectionName(const DefinedAtom *da) const { + if (da->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + switch (da->contentType()) { + case DefinedAtom::typeCode: + return ".text"; + case DefinedAtom::typeData: + return ".data"; + case DefinedAtom::typeConstant: + return ".rodata"; + case DefinedAtom::typeZeroFill: + return ".bss"; + case DefinedAtom::typeThreadData: + return ".tdata"; + case DefinedAtom::typeThreadZeroFill: + return ".tbss"; + default: + break; + } + } + return da->customSectionName(); +} + +/// \brief This maps the input sections to the output section names. +template <class ELFT> +StringRef +DefaultLayout<ELFT>::getOutputSectionName(StringRef archivePath, + StringRef memberPath, + StringRef inputSectionName) const { + StringRef outputSectionName; + if (_linkerScriptSema.hasLayoutCommands()) { + script::Sema::SectionKey key = {archivePath, memberPath, inputSectionName}; + outputSectionName = _linkerScriptSema.getOutputSection(key); + if (!outputSectionName.empty()) + return outputSectionName; + } + return llvm::StringSwitch<StringRef>(inputSectionName) + .StartsWith(".text", ".text") + .StartsWith(".ctors", ".ctors") + .StartsWith(".dtors", ".dtors") + .StartsWith(".rodata", ".rodata") + .StartsWith(".gcc_except_table", ".gcc_except_table") + .StartsWith(".data.rel.ro", ".data.rel.ro") + .StartsWith(".data.rel.local", ".data.rel.local") + .StartsWith(".data", ".data") + .StartsWith(".tdata", ".tdata") + .StartsWith(".tbss", ".tbss") + .StartsWith(".init_array", ".init_array") + .StartsWith(".fini_array", ".fini_array") + .Default(inputSectionName); +} + +/// \brief Gets the segment for a output section +template <class ELFT> +Layout::SegmentType DefaultLayout<ELFT>::getSegmentType( + Section<ELFT> *section) const { + + switch (section->order()) { + case ORDER_INTERP: + return llvm::ELF::PT_INTERP; + + case ORDER_TEXT: + case ORDER_HASH: + case ORDER_DYNAMIC_SYMBOLS: + case ORDER_DYNAMIC_STRINGS: + case ORDER_DYNAMIC_RELOCS: + case ORDER_DYNAMIC_PLT_RELOCS: + case ORDER_REL: + case ORDER_INIT: + case ORDER_PLT: + case ORDER_FINI: + case ORDER_RODATA: + case ORDER_EH_FRAME: + case ORDER_CTORS: + case ORDER_DTORS: + return llvm::ELF::PT_LOAD; + + case ORDER_RO_NOTE: + case ORDER_RW_NOTE: + return llvm::ELF::PT_NOTE; + + case ORDER_DYNAMIC: + return llvm::ELF::PT_DYNAMIC; + + case ORDER_EH_FRAMEHDR: + return llvm::ELF::PT_GNU_EH_FRAME; + + case ORDER_GOT: + case ORDER_GOT_PLT: + case ORDER_DATA: + case ORDER_BSS: + case ORDER_INIT_ARRAY: + case ORDER_FINI_ARRAY: + return llvm::ELF::PT_LOAD; + + case ORDER_TDATA: + case ORDER_TBSS: + return llvm::ELF::PT_TLS; + + default: + return llvm::ELF::PT_NULL; + } +} + +template <class ELFT> +bool DefaultLayout<ELFT>::hasOutputSegment(Section<ELFT> *section) { + switch (section->order()) { + case ORDER_INTERP: + case ORDER_HASH: + case ORDER_DYNAMIC_SYMBOLS: + case ORDER_DYNAMIC_STRINGS: + case ORDER_DYNAMIC_RELOCS: + case ORDER_DYNAMIC_PLT_RELOCS: + case ORDER_REL: + case ORDER_INIT: + case ORDER_PLT: + case ORDER_TEXT: + case ORDER_FINI: + case ORDER_RODATA: + case ORDER_EH_FRAME: + case ORDER_EH_FRAMEHDR: + case ORDER_TDATA: + case ORDER_TBSS: + case ORDER_RO_NOTE: + case ORDER_RW_NOTE: + case ORDER_DYNAMIC: + case ORDER_CTORS: + case ORDER_DTORS: + case ORDER_GOT: + case ORDER_GOT_PLT: + case ORDER_DATA: + case ORDER_INIT_ARRAY: + case ORDER_FINI_ARRAY: + case ORDER_BSS: + case ORDER_NOALLOC: + return true; + default: + return section->hasOutputSegment(); + } +} + +template <class ELFT> +AtomSection<ELFT> *DefaultLayout<ELFT>::createSection( + StringRef sectionName, int32_t contentType, + DefinedAtom::ContentPermissions permissions, SectionOrder sectionOrder) { + return new (_allocator) AtomSection<ELFT>(_context, sectionName, contentType, + permissions, sectionOrder); +} + +template <class ELFT> +AtomSection<ELFT> * +DefaultLayout<ELFT>::getSection(StringRef sectionName, int32_t contentType, + DefinedAtom::ContentPermissions permissions, + const DefinedAtom *da) { + const SectionKey sectionKey(sectionName, permissions, da->file().path()); + SectionOrder sectionOrder = getSectionOrder(sectionName, contentType, permissions); + auto sec = _sectionMap.find(sectionKey); + if (sec != _sectionMap.end()) + return sec->second; + AtomSection<ELFT> *newSec = + createSection(sectionName, contentType, permissions, sectionOrder); + + newSec->setOutputSectionName(getOutputSectionName( + da->file().archivePath(), da->file().memberPath(), sectionName)); + newSec->setOrder(sectionOrder); + newSec->setArchiveNameOrPath(da->file().archivePath()); + newSec->setMemberNameOrPath(da->file().memberPath()); + _sections.push_back(newSec); + _sectionMap.insert(std::make_pair(sectionKey, newSec)); + return newSec; +} + +template <class ELFT> +ErrorOr<const lld::AtomLayout *> +DefaultLayout<ELFT>::addAtom(const Atom *atom) { + if (const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom)) { + // HACK: Ignore undefined atoms. We need to adjust the interface so that + // undefined atoms can still be included in the output symbol table for + // -noinhibit-exec. + if (definedAtom->contentType() == DefinedAtom::typeUnknown) + return make_error_code(llvm::errc::invalid_argument); + const DefinedAtom::ContentPermissions permissions = + definedAtom->permissions(); + const DefinedAtom::ContentType contentType = definedAtom->contentType(); + + StringRef sectionName = getInputSectionName(definedAtom); + AtomSection<ELFT> *section = + getSection(sectionName, contentType, permissions, definedAtom); + + // Add runtime relocations to the .rela section. + for (const auto &reloc : *definedAtom) { + bool isLocalReloc = true; + if (_context.isDynamicRelocation(*reloc)) { + getDynamicRelocationTable()->addRelocation(*definedAtom, *reloc); + isLocalReloc = false; + } else if (_context.isPLTRelocation(*reloc)) { + getPLTRelocationTable()->addRelocation(*definedAtom, *reloc); + isLocalReloc = false; + } + + if (!reloc->target()) + continue; + + //Ignore undefined atoms that are not target of dynamic relocations + if (isa<UndefinedAtom>(reloc->target()) && isLocalReloc) + continue; + + if (_context.isCopyRelocation(*reloc)) { + _copiedDynSymNames.insert(definedAtom->name()); + continue; + } + + _referencedDynAtoms.insert(reloc->target()); + } + + return section->appendAtom(atom); + } else if (const AbsoluteAtom *absoluteAtom = dyn_cast<AbsoluteAtom>(atom)) { + // Absolute atoms are not part of any section, they are global for the whole + // link + _absoluteAtoms.push_back(new (_allocator) + lld::AtomLayout(absoluteAtom, 0, absoluteAtom->value())); + return _absoluteAtoms.back(); + } else { + llvm_unreachable("Only absolute / defined atoms can be added here"); + } +} + +/// Output sections with the same name into a OutputSection +template <class ELFT> void DefaultLayout<ELFT>::createOutputSections() { + OutputSection<ELFT> *outputSection; + + for (auto &si : _sections) { + Section<ELFT> *section = dyn_cast<Section<ELFT>>(si); + if (!section) + continue; + const std::pair<StringRef, OutputSection<ELFT> *> currentOutputSection( + section->outputSectionName(), nullptr); + std::pair<typename OutputSectionMapT::iterator, bool> outputSectionInsert( + _outputSectionMap.insert(currentOutputSection)); + if (!outputSectionInsert.second) { + outputSection = outputSectionInsert.first->second; + } else { + outputSection = new (_allocator.Allocate<OutputSection<ELFT>>()) + OutputSection<ELFT>(section->outputSectionName()); + _outputSections.push_back(outputSection); + outputSectionInsert.first->second = outputSection; + } + outputSection->appendSection(si); + } +} + +template <class ELFT> +uint32_t +DefaultLayout<ELFT>::getPriorityFromSectionName(StringRef sectionName) const { + StringRef priority = sectionName.drop_front().rsplit('.').second; + uint32_t prio; + if (priority.getAsInteger(10, prio)) + return std::numeric_limits<uint32_t>::max(); + return prio; +} + +template <class ELFT> +void DefaultLayout<ELFT>::sortOutputSectionByPriority( + StringRef outputSectionName, StringRef prefix) { + OutputSection<ELFT> *outputSection = findOutputSection(outputSectionName); + if (!outputSection) + return; + + auto sections = outputSection->sections(); + + std::sort(sections.begin(), sections.end(), + [&](Chunk<ELFT> *lhs, Chunk<ELFT> *rhs) { + Section<ELFT> *lhsSection = dyn_cast<Section<ELFT>>(lhs); + Section<ELFT> *rhsSection = dyn_cast<Section<ELFT>>(rhs); + if (!lhsSection || !rhsSection) + return false; + StringRef lhsSectionName = lhsSection->inputSectionName(); + StringRef rhsSectionName = rhsSection->inputSectionName(); + + if (!prefix.empty()) { + if (!lhsSectionName.startswith(prefix) || + !rhsSectionName.startswith(prefix)) + return false; + } + return getPriorityFromSectionName(lhsSectionName) < + getPriorityFromSectionName(rhsSectionName); + }); +} + +template <class ELFT> void DefaultLayout<ELFT>::assignSectionsToSegments() { + ScopedTask task(getDefaultDomain(), "assignSectionsToSegments"); + ELFLinkingContext::OutputMagic outputMagic = _context.getOutputMagic(); + // sort the sections by their order as defined by the layout + sortInputSections(); + + // Create output sections. + createOutputSections(); + + // Finalize output section layout. + finalizeOutputSectionLayout(); + + // Set the ordinal after sorting the sections + int ordinal = 1; + for (auto osi : _outputSections) { + osi->setOrdinal(ordinal); + for (auto ai : osi->sections()) { + ai->setOrdinal(ordinal); + } + ++ordinal; + } + for (auto osi : _outputSections) { + for (auto ai : osi->sections()) { + if (auto section = dyn_cast<Section<ELFT> >(ai)) { + if (!hasOutputSegment(section)) + continue; + + osi->setLoadableSection(section->isLoadableSection()); + + // Get the segment type for the section + int64_t segmentType = getSegmentType(section); + + osi->setHasSegment(); + section->setSegmentType(segmentType); + StringRef segmentName = section->segmentKindToStr(); + + int64_t lookupSectionFlag = osi->flags(); + if ((!(lookupSectionFlag & llvm::ELF::SHF_WRITE)) && + (_context.mergeRODataToTextSegment())) + lookupSectionFlag &= ~llvm::ELF::SHF_EXECINSTR; + + // Merge string sections into Data segment itself + lookupSectionFlag &= ~(llvm::ELF::SHF_STRINGS | llvm::ELF::SHF_MERGE); + + // Merge the TLS section into the DATA segment itself + lookupSectionFlag &= ~(llvm::ELF::SHF_TLS); + + Segment<ELFT> *segment; + // We need a separate segment for sections that don't have + // the segment type to be PT_LOAD + if (segmentType != llvm::ELF::PT_LOAD) { + const AdditionalSegmentKey key(segmentType, lookupSectionFlag); + const std::pair<AdditionalSegmentKey, Segment<ELFT> *> + additionalSegment(key, nullptr); + std::pair<typename AdditionalSegmentMapT::iterator, bool> + additionalSegmentInsert( + _additionalSegmentMap.insert(additionalSegment)); + if (!additionalSegmentInsert.second) { + segment = additionalSegmentInsert.first->second; + } else { + segment = new (_allocator) + Segment<ELFT>(_context, segmentName, segmentType); + additionalSegmentInsert.first->second = segment; + _segments.push_back(segment); + } + segment->append(section); + } + if (segmentType == llvm::ELF::PT_NULL) + continue; + + // If the output magic is set to OutputMagic::NMAGIC or + // OutputMagic::OMAGIC, Place the data alongside text in one single + // segment + if (outputMagic == ELFLinkingContext::OutputMagic::NMAGIC || + outputMagic == ELFLinkingContext::OutputMagic::OMAGIC) + lookupSectionFlag = llvm::ELF::SHF_EXECINSTR | llvm::ELF::SHF_ALLOC | + llvm::ELF::SHF_WRITE; + + // Use the flags of the merged Section for the segment + const SegmentKey key("PT_LOAD", lookupSectionFlag); + const std::pair<SegmentKey, Segment<ELFT> *> currentSegment(key, + nullptr); + std::pair<typename SegmentMapT::iterator, bool> segmentInsert( + _segmentMap.insert(currentSegment)); + if (!segmentInsert.second) { + segment = segmentInsert.first->second; + } else { + segment = new (_allocator) + Segment<ELFT>(_context, "PT_LOAD", llvm::ELF::PT_LOAD); + segmentInsert.first->second = segment; + _segments.push_back(segment); + } + // Insert chunks with linker script expressions that occur at this + // point, just before appending a new input section + addExtraChunksToSegment(segment, section->archivePath(), + section->memberPath(), + section->inputSectionName()); + segment->append(section); + } + } + } + if (_context.isDynamic() && !_context.isDynamicLibrary()) { + Segment<ELFT> *segment = + new (_allocator) ProgramHeaderSegment<ELFT>(_context); + _segments.push_back(segment); + segment->append(_elfHeader); + segment->append(_programHeader); + } +} + +template<class ELFT> +void +DefaultLayout<ELFT>::assignVirtualAddress() { + if (_segments.empty()) + return; + + std::sort(_segments.begin(), _segments.end(), Segment<ELFT>::compareSegments); + + uint64_t baseAddress = _context.getBaseAddress(); + + // HACK: This is a super dirty hack. The elf header and program header are + // not part of a section, but we need them to be loaded at the base address + // so that AT_PHDR is set correctly by the loader and so they are accessible + // at runtime. To do this we simply prepend them to the first loadable Segment + // and let the layout logic take care of it. + Segment<ELFT> *firstLoadSegment = nullptr; + for (auto si : _segments) { + if (si->segmentType() == llvm::ELF::PT_LOAD) { + firstLoadSegment = si; + si->firstSection()->setAlign(si->alignment()); + break; + } + } + assert(firstLoadSegment != nullptr && "No loadable segment!"); + firstLoadSegment->prepend(_programHeader); + firstLoadSegment->prepend(_elfHeader); + bool newSegmentHeaderAdded = true; + bool virtualAddressAssigned = false; + bool fileOffsetAssigned = false; + while (true) { + for (auto si : _segments) { + si->finalize(); + // Don't add PT_NULL segments into the program header + if (si->segmentType() != llvm::ELF::PT_NULL) + newSegmentHeaderAdded = _programHeader->addSegment(si); + } + if (!newSegmentHeaderAdded && virtualAddressAssigned) + break; + uint64_t address = baseAddress; + // start assigning virtual addresses + for (auto &si : _segments) { + if ((si->segmentType() != llvm::ELF::PT_LOAD) && + (si->segmentType() != llvm::ELF::PT_NULL)) + continue; + + if (si->segmentType() == llvm::ELF::PT_NULL) { + si->assignVirtualAddress(0 /*non loadable*/); + } else { + if (virtualAddressAssigned && (address != baseAddress) && + (address == si->virtualAddr())) + break; + si->assignVirtualAddress(address); + } + address = si->virtualAddr() + si->memSize(); + } + uint64_t baseFileOffset = 0; + uint64_t fileoffset = baseFileOffset; + for (auto &si : _segments) { + if ((si->segmentType() != llvm::ELF::PT_LOAD) && + (si->segmentType() != llvm::ELF::PT_NULL)) + continue; + if (fileOffsetAssigned && (fileoffset != baseFileOffset) && + (fileoffset == si->fileOffset())) + break; + si->assignFileOffsets(fileoffset); + fileoffset = si->fileOffset() + si->fileSize(); + } + virtualAddressAssigned = true; + fileOffsetAssigned = true; + _programHeader->resetProgramHeaders(); + } + Section<ELFT> *section; + // Fix the offsets of all the atoms within a section + for (auto &si : _sections) { + section = dyn_cast<Section<ELFT>>(si); + if (section && DefaultLayout<ELFT>::hasOutputSegment(section)) + section->assignFileOffsets(section->fileOffset()); + } + // Set the size of the merged Sections + for (auto osi : _outputSections) { + uint64_t sectionfileoffset = 0; + uint64_t startFileOffset = 0; + uint64_t sectionsize = 0; + bool isFirstSection = true; + for (auto si : osi->sections()) { + if (isFirstSection) { + startFileOffset = si->fileOffset(); + isFirstSection = false; + } + sectionfileoffset = si->fileOffset(); + sectionsize = si->fileSize(); + } + sectionsize = (sectionfileoffset - startFileOffset) + sectionsize; + osi->setFileOffset(startFileOffset); + osi->setSize(sectionsize); + } + // Set the virtual addr of the merged Sections + for (auto osi : _outputSections) { + uint64_t sectionstartaddr = 0; + uint64_t startaddr = 0; + uint64_t sectionsize = 0; + bool isFirstSection = true; + for (auto si : osi->sections()) { + if (isFirstSection) { + startaddr = si->virtualAddr(); + isFirstSection = false; + } + sectionstartaddr = si->virtualAddr(); + sectionsize = si->memSize(); + } + sectionsize = (sectionstartaddr - startaddr) + sectionsize; + osi->setMemSize(sectionsize); + osi->setAddr(startaddr); + } +} + +template <class ELFT> +void DefaultLayout<ELFT>::assignFileOffsetsForMiscSections() { + uint64_t fileoffset = 0; + uint64_t size = 0; + for (auto si : _segments) { + // Don't calculate offsets from non loadable segments + if ((si->segmentType() != llvm::ELF::PT_LOAD) && + (si->segmentType() != llvm::ELF::PT_NULL)) + continue; + fileoffset = si->fileOffset(); + size = si->fileSize(); + } + fileoffset = fileoffset + size; + Section<ELFT> *section; + for (auto si : _sections) { + section = dyn_cast<Section<ELFT>>(si); + if (section && DefaultLayout<ELFT>::hasOutputSegment(section)) + continue; + fileoffset = llvm::RoundUpToAlignment(fileoffset, si->alignment()); + si->setFileOffset(fileoffset); + si->setVirtualAddr(0); + fileoffset += si->fileSize(); + } +} + +template <class ELFT> void DefaultLayout<ELFT>::sortInputSections() { + // First, sort according to default layout's order + std::stable_sort( + _sections.begin(), _sections.end(), + [](Chunk<ELFT> *A, Chunk<ELFT> *B) { return A->order() < B->order(); }); + + if (!_linkerScriptSema.hasLayoutCommands()) + return; + + // Sort the sections by their order as defined by the linker script + std::stable_sort(this->_sections.begin(), this->_sections.end(), + [this](Chunk<ELFT> *A, Chunk<ELFT> *B) { + auto *a = dyn_cast<Section<ELFT>>(A); + auto *b = dyn_cast<Section<ELFT>>(B); + + if (a == nullptr) + return false; + if (b == nullptr) + return true; + + return _linkerScriptSema.less( + {a->archivePath(), a->memberPath(), + a->inputSectionName()}, + {b->archivePath(), b->memberPath(), + b->inputSectionName()}); + }); + // Now try to arrange sections with no mapping rules to sections with + // similar content + auto p = this->_sections.begin(); + // Find first section that has no assigned rule id + while (p != this->_sections.end()) { + auto *sect = dyn_cast<AtomSection<ELFT>>(*p); + if (!sect) + break; + + if (!_linkerScriptSema.hasMapping({sect->archivePath(), + sect->memberPath(), + sect->inputSectionName()})) + break; + + ++p; + } + // For all sections that have no assigned rule id, try to move them near a + // section with similar contents + if (p != this->_sections.begin()) { + for (; p != this->_sections.end(); ++p) { + auto q = p; + --q; + while (q != this->_sections.begin() && + (*q)->getContentType() != (*p)->getContentType()) + --q; + if ((*q)->getContentType() != (*p)->getContentType()) + continue; + ++q; + for (auto i = p; i != q;) { + auto next = i--; + std::iter_swap(i, next); + } + } + } +} + +template <class ELFT> +void DefaultLayout<ELFT>::addExtraChunksToSegment(Segment<ELFT> *segment, + StringRef archivePath, + StringRef memberPath, + StringRef sectionName) { + if (!_linkerScriptSema.hasLayoutCommands()) + return; + + std::vector<const script::SymbolAssignment *> exprs = + _linkerScriptSema.getExprs({archivePath, memberPath, sectionName}); + for (auto expr : exprs) { + auto expChunk = + new (this->_allocator) ExpressionChunk<ELFT>(this->_context, expr); + segment->append(expChunk); + } +} + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/DefaultTargetHandler.h b/lib/ReaderWriter/ELF/DefaultTargetHandler.h new file mode 100644 index 000000000000..16668f2df618 --- /dev/null +++ b/lib/ReaderWriter/ELF/DefaultTargetHandler.h @@ -0,0 +1,38 @@ +//===- lib/ReaderWriter/ELF/DefaultTargetHandler.h ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_DEFAULT_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_DEFAULT_TARGET_HANDLER_H + +#include "DefaultLayout.h" +#include "DynamicLibraryWriter.h" +#include "ELFReader.h" +#include "ExecutableWriter.h" +#include "TargetHandler.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { +template <class ELFT> +class DefaultTargetHandler : public TargetHandler<ELFT> { +public: + const TargetRelocationHandler &getRelocationHandler() const = 0; + + virtual std::unique_ptr<Reader> getObjReader() = 0; + + virtual std::unique_ptr<Reader> getDSOReader() = 0; + + virtual std::unique_ptr<Writer> getWriter() = 0; +}; + +} // end namespace elf +} // end namespace lld +#endif diff --git a/lib/ReaderWriter/ELF/DynamicFile.h b/lib/ReaderWriter/ELF/DynamicFile.h new file mode 100644 index 000000000000..c4e3e7165efd --- /dev/null +++ b/lib/ReaderWriter/ELF/DynamicFile.h @@ -0,0 +1,123 @@ +//===- lib/ReaderWriter/ELF/DynamicFile.h ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_DYNAMIC_FILE_H +#define LLD_READER_WRITER_ELF_DYNAMIC_FILE_H + +#include "Atoms.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Path.h" +#include <unordered_map> + +namespace lld { +namespace elf { +template <class ELFT> class DynamicFile : public SharedLibraryFile { +public: + static ErrorOr<std::unique_ptr<DynamicFile>> + create(std::unique_ptr<llvm::MemoryBuffer> mb, ELFLinkingContext &ctx); + + const SharedLibraryAtom *exports(StringRef name, + bool dataSymbolOnly) const override { + assert(!dataSymbolOnly && "Invalid option for ELF exports!"); + // See if we have the symbol. + auto sym = _nameToSym.find(name); + if (sym == _nameToSym.end()) + return nullptr; + // Have we already created a SharedLibraryAtom for it? + if (sym->second._atom) + return sym->second._atom; + // Create a SharedLibraryAtom for this symbol. + return sym->second._atom = new (_alloc) ELFDynamicAtom<ELFT>( + *this, name, _soname, sym->second._symbol); + } + + StringRef getDSOName() const override { return _soname; } + +protected: + std::error_code doParse() override { + std::error_code ec; + _objFile.reset( + new llvm::object::ELFFile<ELFT>(_mb->getBuffer(), ec)); + if (ec) + return ec; + + llvm::object::ELFFile<ELFT> &obj = *_objFile; + + _soname = obj.getLoadName(); + if (_soname.empty()) + _soname = llvm::sys::path::filename(path()); + + // Create a map from names to dynamic symbol table entries. + // TODO: This should use the object file's build in hash table instead if + // it exists. + for (auto i = obj.begin_dynamic_symbols(), e = obj.end_dynamic_symbols(); + i != e; ++i) { + auto name = obj.getSymbolName(i); + if ((ec = name.getError())) + return ec; + + // Dont add local symbols to dynamic entries. The first symbol in the + // dynamic symbol table is a local symbol. + if (i->getBinding() == llvm::ELF::STB_LOCAL) + continue; + + // TODO: Add absolute symbols + if (i->st_shndx == llvm::ELF::SHN_ABS) + continue; + + if (i->st_shndx == llvm::ELF::SHN_UNDEF) { + if (!_useShlibUndefines) + continue; + // Create an undefined atom. + if (!name->empty()) { + auto *newAtom = new (_alloc) ELFUndefinedAtom<ELFT>(*this, *name, &*i); + _undefinedAtoms._atoms.push_back(newAtom); + } + continue; + } + _nameToSym[*name]._symbol = &*i; + } + return std::error_code(); + } + +private: + DynamicFile(std::unique_ptr<MemoryBuffer> mb, ELFLinkingContext &ctx) + : SharedLibraryFile(mb->getBufferIdentifier()), _mb(std::move(mb)), + _ctx(ctx), _useShlibUndefines(ctx.useShlibUndefines()) {} + + mutable llvm::BumpPtrAllocator _alloc; + std::unique_ptr<llvm::object::ELFFile<ELFT>> _objFile; + /// \brief DT_SONAME + StringRef _soname; + + struct SymAtomPair { + SymAtomPair() : _symbol(nullptr), _atom(nullptr) {} + const typename llvm::object::ELFFile<ELFT>::Elf_Sym *_symbol; + const SharedLibraryAtom *_atom; + }; + + std::unique_ptr<MemoryBuffer> _mb; + ELFLinkingContext &_ctx; + bool _useShlibUndefines; + mutable std::unordered_map<StringRef, SymAtomPair> _nameToSym; +}; + +template <class ELFT> +ErrorOr<std::unique_ptr<DynamicFile<ELFT>>> +DynamicFile<ELFT>::create(std::unique_ptr<llvm::MemoryBuffer> mb, + ELFLinkingContext &ctx) { + return std::unique_ptr<DynamicFile>(new DynamicFile(std::move(mb), ctx)); +} + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/DynamicLibraryWriter.h b/lib/ReaderWriter/ELF/DynamicLibraryWriter.h new file mode 100644 index 000000000000..f97514b525c0 --- /dev/null +++ b/lib/ReaderWriter/ELF/DynamicLibraryWriter.h @@ -0,0 +1,96 @@ +//===- lib/ReaderWriter/ELF/DynamicLibraryWriter.h ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_DYNAMIC_LIBRARY_WRITER_H +#define LLD_READER_WRITER_ELF_DYNAMIC_LIBRARY_WRITER_H + +#include "OutputELFWriter.h" + +namespace lld { +namespace elf { +using namespace llvm; +using namespace llvm::object; + +template<class ELFT> +class DynamicLibraryWriter; + +//===----------------------------------------------------------------------===// +// DynamicLibraryWriter Class +//===----------------------------------------------------------------------===// +template<class ELFT> +class DynamicLibraryWriter : public OutputELFWriter<ELFT> { +public: + DynamicLibraryWriter(ELFLinkingContext &context, TargetLayout<ELFT> &layout) + : OutputELFWriter<ELFT>(context, layout), + _runtimeFile(new RuntimeFile<ELFT>(context, "C runtime")) {} + +protected: + virtual void buildDynamicSymbolTable(const File &file); + virtual void addDefaultAtoms(); + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &); + virtual void finalizeDefaultAtomValues(); + +protected: + std::unique_ptr<RuntimeFile<ELFT> > _runtimeFile; +}; + +//===----------------------------------------------------------------------===// +// DynamicLibraryWriter +//===----------------------------------------------------------------------===// +template <class ELFT> +void DynamicLibraryWriter<ELFT>::buildDynamicSymbolTable(const File &file) { + // Add all the defined symbols to the dynamic symbol table + // we need hooks into the Atom to find out which atoms need + // to be exported + for (auto sec : this->_layout.sections()) + if (auto section = dyn_cast<AtomSection<ELFT>>(sec)) + for (const auto &atom : section->atoms()) { + const DefinedAtom *da = dyn_cast<const DefinedAtom>(atom->_atom); + if (da && (da->scope() == DefinedAtom::scopeGlobal)) + this->_dynamicSymbolTable->addSymbol(atom->_atom, section->ordinal(), + atom->_virtualAddr, atom); + } + + for (const UndefinedAtom *a : file.undefined()) + this->_dynamicSymbolTable->addSymbol(a, ELF::SHN_UNDEF); + + OutputELFWriter<ELFT>::buildDynamicSymbolTable(file); +} + +template <class ELFT> void DynamicLibraryWriter<ELFT>::addDefaultAtoms() { + _runtimeFile->addAbsoluteAtom("_end"); +} + +/// \brief Hook in lld to add CRuntime file +template <class ELFT> +bool DynamicLibraryWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + // Add the default atoms as defined by executables + DynamicLibraryWriter<ELFT>::addDefaultAtoms(); + OutputELFWriter<ELFT>::createImplicitFiles(result); + result.push_back(std::move(_runtimeFile)); + return true; +} + +template <class ELFT> +void DynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues() { + auto underScoreEndAtomIter = this->_layout.findAbsoluteAtom("_end"); + + if (auto bssSection = this->_layout.findOutputSection(".bss")) { + (*underScoreEndAtomIter)->_virtualAddr = + bssSection->virtualAddr() + bssSection->memSize(); + } else if (auto dataSection = this->_layout.findOutputSection(".data")) { + (*underScoreEndAtomIter)->_virtualAddr = + dataSection->virtualAddr() + dataSection->memSize(); + } +} + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_DYNAMIC_LIBRARY_WRITER_H diff --git a/lib/ReaderWriter/ELF/ELFFile.h b/lib/ReaderWriter/ELF/ELFFile.h new file mode 100644 index 000000000000..11f4ee4fc633 --- /dev/null +++ b/lib/ReaderWriter/ELF/ELFFile.h @@ -0,0 +1,1179 @@ +//===- lib/ReaderWriter/ELF/ELFFile.h -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_FILE_H +#define LLD_READER_WRITER_ELF_FILE_H + +#include "Atoms.h" +#include <llvm/ADT/MapVector.h> +#include <map> +#include <unordered_map> + +namespace lld { + +namespace elf { +/// \brief Read a binary, find out based on the symbol table contents what kind +/// of symbol it is and create corresponding atoms for it +template <class ELFT> class ELFFile : public File { + + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Sym_Iter Elf_Sym_Iter; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela_Iter Elf_Rela_Iter; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel_Iter Elf_Rel_Iter; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Word Elf_Word; + + // A Map is used to hold the atoms that have been divided up + // after reading the section that contains Merge String attributes + struct MergeSectionKey { + MergeSectionKey(const Elf_Shdr *shdr, int64_t offset) + : _shdr(shdr), _offset(offset) {} + // Data members + const Elf_Shdr *_shdr; + int64_t _offset; + }; + struct MergeSectionEq { + int64_t operator()(const MergeSectionKey &k) const { + return llvm::hash_combine((int64_t)(k._shdr->sh_name), + (int64_t)k._offset); + } + bool operator()(const MergeSectionKey &lhs, + const MergeSectionKey &rhs) const { + return ((lhs._shdr->sh_name == rhs._shdr->sh_name) && + (lhs._offset == rhs._offset)); + } + }; + + struct MergeString { + MergeString(int64_t offset, StringRef str, const Elf_Shdr *shdr, + StringRef sectionName) + : _offset(offset), _string(str), _shdr(shdr), + _sectionName(sectionName) {} + // the offset of this atom + int64_t _offset; + // The content + StringRef _string; + // Section header + const Elf_Shdr *_shdr; + // Section name + StringRef _sectionName; + }; + + // This is used to find the MergeAtom given a relocation + // offset + typedef std::vector<ELFMergeAtom<ELFT> *> MergeAtomsT; + + /// \brief find a mergeAtom given a start offset + struct FindByOffset { + const Elf_Shdr *_shdr; + int64_t _offset; + FindByOffset(const Elf_Shdr *shdr, int64_t offset) + : _shdr(shdr), _offset(offset) {} + bool operator()(const ELFMergeAtom<ELFT> *a) { + int64_t off = a->offset(); + return (_shdr->sh_name == a->section()) && + ((_offset >= off) && (_offset <= off + (int64_t)a->size())); + } + }; + + /// \brief find a merge atom given a offset + ELFMergeAtom<ELFT> *findMergeAtom(const Elf_Shdr *shdr, uint64_t offset) { + auto it = std::find_if(_mergeAtoms.begin(), _mergeAtoms.end(), + FindByOffset(shdr, offset)); + assert(it != _mergeAtoms.end()); + return *it; + } + + typedef std::unordered_map<MergeSectionKey, DefinedAtom *, MergeSectionEq, + MergeSectionEq> MergedSectionMapT; + typedef typename MergedSectionMapT::iterator MergedSectionMapIterT; + +public: + ELFFile(StringRef name, ELFLinkingContext &ctx) + : File(name, kindObject), _ordinal(0), + _doStringsMerge(ctx.mergeCommonStrings()), _useWrap(false), _ctx(ctx) { + setLastError(std::error_code()); + } + + ELFFile(std::unique_ptr<MemoryBuffer> mb, ELFLinkingContext &ctx) + : File(mb->getBufferIdentifier(), kindObject), _mb(std::move(mb)), + _ordinal(0), _doStringsMerge(ctx.mergeCommonStrings()), + _useWrap(ctx.wrapCalls().size()), _ctx(ctx) {} + + static ErrorOr<std::unique_ptr<ELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, ELFLinkingContext &ctx); + + virtual Reference::KindArch kindArch(); + + /// \brief Create symbols from LinkingContext. + std::error_code createAtomsFromContext(); + + /// \brief Read input sections and populate necessary data structures + /// to read them later and create atoms + std::error_code createAtomizableSections(); + + /// \brief Create mergeable atoms from sections that have the merge attribute + /// set + std::error_code createMergeableAtoms(); + + /// \brief Add the symbols that the sections contain. The symbols will be + /// converted to atoms for + /// Undefined symbols, absolute symbols + std::error_code createSymbolsFromAtomizableSections(); + + /// \brief Create individual atoms + std::error_code createAtoms(); + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + Atom *findAtom(const Elf_Sym *sourceSymbol, const Elf_Sym *targetSymbol) { + // All references to atoms inside a group are through undefined atoms. + Atom *targetAtom = _symbolToAtomMapping.lookup(targetSymbol); + StringRef targetSymbolName = targetAtom->name(); + if (targetAtom->definition() != Atom::definitionRegular) + return targetAtom; + if ((llvm::dyn_cast<DefinedAtom>(targetAtom))->scope() == + DefinedAtom::scopeTranslationUnit) + return targetAtom; + if (!redirectReferenceUsingUndefAtom(sourceSymbol, targetSymbol)) + return targetAtom; + auto undefForGroupchild = _undefAtomsForGroupChild.find(targetSymbolName); + if (undefForGroupchild != _undefAtomsForGroupChild.end()) + return undefForGroupchild->getValue(); + auto undefGroupChildAtom = + new (_readerStorage) SimpleUndefinedAtom(*this, targetSymbolName); + _undefinedAtoms._atoms.push_back(undefGroupChildAtom); + return (_undefAtomsForGroupChild[targetSymbolName] = undefGroupChildAtom); + } + +protected: + ELFDefinedAtom<ELFT> *createDefinedAtomAndAssignRelocations( + StringRef symbolName, StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> symContent, + ArrayRef<uint8_t> secContent); + + std::error_code doParse() override; + + /// \brief Iterate over Elf_Rela relocations list and create references. + virtual void createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> content, + range<Elf_Rela_Iter> rels); + + /// \brief Iterate over Elf_Rel relocations list and create references. + virtual void createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> symContent, + ArrayRef<uint8_t> secContent, + range<Elf_Rel_Iter> rels); + + /// \brief After all the Atoms and References are created, update each + /// Reference's target with the Atom pointer it refers to. + void updateReferences(); + + /// \brief Update the reference if the access corresponds to a merge string + /// section. + void updateReferenceForMergeStringAccess(ELFReference<ELFT> *ref, + const Elf_Sym *symbol, + const Elf_Shdr *shdr); + + /// \brief Do we want to ignore the section. Ignored sections are + /// not processed to create atoms + bool isIgnoredSection(const Elf_Shdr *section); + + /// \brief Is the current section be treated as a mergeable string section. + /// The contents of a mergeable string section are null-terminated strings. + /// If the section have mergeable strings, the linker would need to split + /// the section into multiple atoms and mark them mergeByContent. + bool isMergeableStringSection(const Elf_Shdr *section); + + /// \brief Returns a new anonymous atom whose size is equal to the + /// section size. That atom will be used to represent the entire + /// section that have no symbols. + ELFDefinedAtom<ELFT> *createSectionAtom(const Elf_Shdr *section, + StringRef sectionName, + ArrayRef<uint8_t> contents); + + /// Returns the symbol's content size. The nextSymbol should be null if the + /// symbol is the last one in the section. + uint64_t symbolContentSize(const Elf_Shdr *section, + const Elf_Sym *symbol, + const Elf_Sym *nextSymbol); + + void createEdge(ELFDefinedAtom<ELFT> *from, ELFDefinedAtom<ELFT> *to, + uint32_t edgeKind); + + /// Get the section name for a section. + ErrorOr<StringRef> getSectionName(const Elf_Shdr *shdr) const { + if (!shdr) + return StringRef(); + return _objFile->getSectionName(shdr); + } + + /// Determines if the section occupy memory space. + bool sectionOccupiesMemorySpace(const Elf_Shdr *shdr) const { + return (shdr->sh_type != llvm::ELF::SHT_NOBITS); + } + + /// Return the section contents. + ErrorOr<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr *shdr) const { + if (!shdr || !sectionOccupiesMemorySpace(shdr)) + return ArrayRef<uint8_t>(); + return _objFile->getSectionContents(shdr); + } + + /// Returns true if the symbol is a undefined symbol. + bool isUndefinedSymbol(const Elf_Sym *sym) const { + return (sym->st_shndx == llvm::ELF::SHN_UNDEF); + } + + /// Determines if the target wants to create an atom for a section that has no + /// symbol references. + bool handleSectionWithNoSymbols(const Elf_Shdr *shdr, + std::vector<Elf_Sym_Iter> &syms) const { + return shdr && (shdr->sh_type == llvm::ELF::SHT_PROGBITS) && syms.empty(); + } + + /// Handle creation of atoms for .gnu.linkonce sections. + std::error_code handleGnuLinkOnceSection( + StringRef sectionName, + llvm::StringMap<std::vector<ELFDefinedAtom<ELFT> *>> &atomsForSection, + const Elf_Shdr *shdr); + + // Handle Section groups/COMDAT scetions. + std::error_code handleSectionGroup( + StringRef signature, StringRef groupSectionName, + llvm::StringMap<std::vector<ELFDefinedAtom<ELFT> *>> &atomsForSection, + llvm::DenseMap<const Elf_Shdr *, std::vector<StringRef>> &comdatSections, + const Elf_Shdr *shdr); + + /// Process the Undefined symbol and create an atom for it. + ErrorOr<ELFUndefinedAtom<ELFT> *> + handleUndefinedSymbol(StringRef symName, const Elf_Sym *sym) { + return new (_readerStorage) ELFUndefinedAtom<ELFT>(*this, symName, sym); + } + + /// Returns true if the symbol is a absolute symbol. + bool isAbsoluteSymbol(const Elf_Sym *sym) const { + return (sym->st_shndx == llvm::ELF::SHN_ABS); + } + + /// Process the Absolute symbol and create an atom for it. + ErrorOr<ELFAbsoluteAtom<ELFT> *> + handleAbsoluteSymbol(StringRef symName, const Elf_Sym *sym, int64_t value) { + return new (_readerStorage) + ELFAbsoluteAtom<ELFT>(*this, symName, sym, value); + } + + /// Returns true if the symbol is common symbol. A common symbol represents a + /// tentive definition in C. It has name, size and alignment constraint, but + /// actual storage has not yet been allocated. (The linker will allocate + /// storage for them in the later pass after coalescing tentative symbols by + /// name.) + virtual bool isCommonSymbol(const Elf_Sym *symbol) const { + return symbol->getType() == llvm::ELF::STT_COMMON || + symbol->st_shndx == llvm::ELF::SHN_COMMON; + } + + /// Returns true if the section is a gnulinkonce section. + bool isGnuLinkOnceSection(StringRef sectionName) const { + return sectionName.startswith(".gnu.linkonce."); + } + + /// Returns true if the section is a COMDAT group section. + bool isGroupSection(const Elf_Shdr *shdr) const { + return (shdr->sh_type == llvm::ELF::SHT_GROUP); + } + + /// Returns true if the section is a member of some group. + bool isSectionMemberOfGroup(const Elf_Shdr *shdr) const { + return (shdr->sh_flags & llvm::ELF::SHF_GROUP); + } + + /// Returns correct st_value for the symbol depending on the architecture. + /// For most architectures it's just a regular st_value with no changes. + virtual uint64_t getSymbolValue(const Elf_Sym *symbol) const { + return symbol->st_value; + } + + /// Process the common symbol and create an atom for it. + virtual ErrorOr<ELFCommonAtom<ELFT> *> + handleCommonSymbol(StringRef symName, const Elf_Sym *sym) { + return new (_readerStorage) ELFCommonAtom<ELFT>(*this, symName, sym); + } + + /// Returns true if the symbol is a defined symbol. + virtual bool isDefinedSymbol(const Elf_Sym *sym) const { + return (sym->getType() == llvm::ELF::STT_NOTYPE || + sym->getType() == llvm::ELF::STT_OBJECT || + sym->getType() == llvm::ELF::STT_FUNC || + sym->getType() == llvm::ELF::STT_GNU_IFUNC || + sym->getType() == llvm::ELF::STT_SECTION || + sym->getType() == llvm::ELF::STT_FILE || + sym->getType() == llvm::ELF::STT_TLS); + } + + /// Process the Defined symbol and create an atom for it. + virtual ErrorOr<ELFDefinedAtom<ELFT> *> + handleDefinedSymbol(StringRef symName, StringRef sectionName, + const Elf_Sym *sym, const Elf_Shdr *sectionHdr, + ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) { + return new (_readerStorage) ELFDefinedAtom<ELFT>( + *this, symName, sectionName, sym, sectionHdr, contentData, + referenceStart, referenceEnd, referenceList); + } + + /// Process the Merge string and create an atom for it. + ErrorOr<ELFMergeAtom<ELFT> *> + handleMergeString(StringRef sectionName, const Elf_Shdr *sectionHdr, + ArrayRef<uint8_t> contentData, unsigned int offset) { + ELFMergeAtom<ELFT> *mergeAtom = new (_readerStorage) + ELFMergeAtom<ELFT>(*this, sectionName, sectionHdr, contentData, offset); + const MergeSectionKey mergedSectionKey(sectionHdr, offset); + if (_mergedSectionMap.find(mergedSectionKey) == _mergedSectionMap.end()) + _mergedSectionMap.insert(std::make_pair(mergedSectionKey, mergeAtom)); + return mergeAtom; + } + + /// References to the sections comprising a group, from sections + /// outside the group, must be made via global UNDEF symbols, + /// referencing global symbols defined as addresses in the group + /// sections. They may not reference local symbols for addresses in + /// the group's sections, including section symbols. + /// ABI Doc : https://mentorembedded.github.io/cxx-abi/abi/prop-72-comdat.html + /// Does the atom need to be redirected using a separate undefined atom? + bool redirectReferenceUsingUndefAtom(const Elf_Sym *sourceSymbol, + const Elf_Sym *targetSymbol) const; + + void addReferenceToSymbol(const ELFReference<ELFT> *r, const Elf_Sym *sym) { + _referenceToSymbol[r] = sym; + } + + const Elf_Sym *findSymbolForReference(const ELFReference<ELFT> *r) const { + auto elfReferenceToSymbol = _referenceToSymbol.find(r); + if (elfReferenceToSymbol != _referenceToSymbol.end()) + return elfReferenceToSymbol->second; + return nullptr; + } + + llvm::BumpPtrAllocator _readerStorage; + std::unique_ptr<llvm::object::ELFFile<ELFT> > _objFile; + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; + + /// \brief _relocationAddendReferences and _relocationReferences contain the + /// list of relocations references. In ELF, if a section named, ".text" has + /// relocations will also have a section named ".rel.text" or ".rela.text" + /// which will hold the entries. + std::unordered_map<StringRef, range<Elf_Rela_Iter>> + _relocationAddendReferences; + MergedSectionMapT _mergedSectionMap; + std::unordered_map<StringRef, range<Elf_Rel_Iter>> _relocationReferences; + std::vector<ELFReference<ELFT> *> _references; + llvm::DenseMap<const Elf_Sym *, Atom *> _symbolToAtomMapping; + llvm::DenseMap<const ELFReference<ELFT> *, const Elf_Sym *> + _referenceToSymbol; + // Group child atoms have a pair corresponding to the signature and the + // section header of the section that was used for generating the signature. + llvm::DenseMap<const Elf_Sym *, std::pair<StringRef, const Elf_Shdr *>> + _groupChild; + llvm::StringMap<Atom *> _undefAtomsForGroupChild; + + /// \brief Atoms that are created for a section that has the merge property + /// set + MergeAtomsT _mergeAtoms; + + /// \brief the section and the symbols that are contained within it to create + /// used to create atoms + llvm::MapVector<const Elf_Shdr *, std::vector<Elf_Sym_Iter>> _sectionSymbols; + + /// \brief Sections that have merge string property + std::vector<const Elf_Shdr *> _mergeStringSections; + + std::unique_ptr<MemoryBuffer> _mb; + int64_t _ordinal; + + /// \brief the cached options relevant while reading the ELF File + bool _doStringsMerge; + + /// \brief Is --wrap on? + bool _useWrap; + + /// \brief The LinkingContext. + ELFLinkingContext &_ctx; + + // Wrap map + llvm::StringMap<UndefinedAtom *> _wrapSymbolMap; +}; + +/// \brief All atoms are owned by a File. To add linker specific atoms +/// the atoms need to be inserted to a file called (RuntimeFile) which +/// are basically additional symbols required by libc and other runtime +/// libraries part of executing a program. This class provides support +/// for adding absolute symbols and undefined symbols +template <class ELFT> class RuntimeFile : public ELFFile<ELFT> { +public: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + RuntimeFile(ELFLinkingContext &context, StringRef name) + : ELFFile<ELFT>(name, context) {} + + /// \brief add a global absolute atom + virtual Atom *addAbsoluteAtom(StringRef symbolName) { + assert(!symbolName.empty() && "AbsoluteAtoms must have a name"); + Elf_Sym *symbol = new (this->_readerStorage) Elf_Sym; + symbol->st_name = 0; + symbol->st_value = 0; + symbol->st_shndx = llvm::ELF::SHN_ABS; + symbol->setBindingAndType(llvm::ELF::STB_GLOBAL, llvm::ELF::STT_OBJECT); + symbol->setVisibility(llvm::ELF::STV_DEFAULT); + symbol->st_size = 0; + auto newAtom = this->handleAbsoluteSymbol(symbolName, symbol, -1); + this->_absoluteAtoms._atoms.push_back(*newAtom); + return *newAtom; + } + + /// \brief add an undefined atom + virtual Atom *addUndefinedAtom(StringRef symbolName) { + assert(!symbolName.empty() && "UndefinedAtoms must have a name"); + Elf_Sym *symbol = new (this->_readerStorage) Elf_Sym; + symbol->st_name = 0; + symbol->st_value = 0; + symbol->st_shndx = llvm::ELF::SHN_UNDEF; + symbol->setBindingAndType(llvm::ELF::STB_GLOBAL, llvm::ELF::STT_NOTYPE); + symbol->setVisibility(llvm::ELF::STV_DEFAULT); + symbol->st_size = 0; + auto newAtom = this->handleUndefinedSymbol(symbolName, symbol); + this->_undefinedAtoms._atoms.push_back(*newAtom); + return *newAtom; + } + + // cannot add atoms to Runtime file + virtual void addAtom(const Atom &) { + llvm_unreachable("cannot add atoms to Runtime files"); + } +}; + +template <class ELFT> +ErrorOr<std::unique_ptr<ELFFile<ELFT>>> +ELFFile<ELFT>::create(std::unique_ptr<MemoryBuffer> mb, + ELFLinkingContext &ctx) { + std::unique_ptr<ELFFile<ELFT>> file(new ELFFile<ELFT>(std::move(mb), ctx)); + return std::move(file); +} + +template <class ELFT> +std::error_code ELFFile<ELFT>::doParse() { + std::error_code ec; + _objFile.reset(new llvm::object::ELFFile<ELFT>(_mb->getBuffer(), ec)); + if (ec) + return ec; + + if ((ec = createAtomsFromContext())) + return ec; + + // Read input sections from the input file that need to be converted to + // atoms + if ((ec = createAtomizableSections())) + return ec; + + // For mergeable strings, we would need to split the section into various + // atoms + if ((ec = createMergeableAtoms())) + return ec; + + // Create the necessary symbols that are part of the section that we + // created in createAtomizableSections function + if ((ec = createSymbolsFromAtomizableSections())) + return ec; + + // Create the appropriate atoms from the file + if ((ec = createAtoms())) + return ec; + return std::error_code(); +} + +template <class ELFT> Reference::KindArch ELFFile<ELFT>::kindArch() { + switch (_objFile->getHeader()->e_machine) { + case llvm::ELF::EM_X86_64: + return Reference::KindArch::x86_64; + case llvm::ELF::EM_386: + return Reference::KindArch::x86; + case llvm::ELF::EM_ARM: + return Reference::KindArch::ARM; + case llvm::ELF::EM_HEXAGON: + return Reference::KindArch::Hexagon; + case llvm::ELF::EM_MIPS: + return Reference::KindArch::Mips; + case llvm::ELF::EM_AARCH64: + return Reference::KindArch::AArch64; + } + llvm_unreachable("unsupported e_machine value"); +} + +template <class ELFT> +std::error_code ELFFile<ELFT>::createAtomizableSections() { + // Handle: SHT_REL and SHT_RELA sections: + // Increment over the sections, when REL/RELA section types are found add + // the contents to the RelocationReferences map. + // Record the number of relocs to guess at preallocating the buffer. + uint64_t totalRelocs = 0; + for (const Elf_Shdr §ion : _objFile->sections()) { + if (isIgnoredSection(§ion)) + continue; + + if (isMergeableStringSection(§ion)) { + _mergeStringSections.push_back(§ion); + continue; + } + + if (section.sh_type == llvm::ELF::SHT_RELA) { + auto sHdr = _objFile->getSection(section.sh_info); + + auto sectionName = _objFile->getSectionName(sHdr); + if (std::error_code ec = sectionName.getError()) + return ec; + + auto rai(_objFile->begin_rela(§ion)); + auto rae(_objFile->end_rela(§ion)); + + _relocationAddendReferences[*sectionName] = make_range(rai, rae); + totalRelocs += std::distance(rai, rae); + } else if (section.sh_type == llvm::ELF::SHT_REL) { + auto sHdr = _objFile->getSection(section.sh_info); + + auto sectionName = _objFile->getSectionName(sHdr); + if (std::error_code ec = sectionName.getError()) + return ec; + + auto ri(_objFile->begin_rel(§ion)); + auto re(_objFile->end_rel(§ion)); + + _relocationReferences[*sectionName] = make_range(ri, re); + totalRelocs += std::distance(ri, re); + } else { + _sectionSymbols[§ion]; + } + } + _references.reserve(totalRelocs); + return std::error_code(); +} + +template <class ELFT> std::error_code ELFFile<ELFT>::createMergeableAtoms() { + // Divide the section that contains mergeable strings into tokens + // TODO + // a) add resolver support to recognize multibyte chars + // b) Create a separate section chunk to write mergeable atoms + std::vector<MergeString *> tokens; + for (const Elf_Shdr *msi : _mergeStringSections) { + auto sectionName = getSectionName(msi); + if (std::error_code ec = sectionName.getError()) + return ec; + + auto sectionContents = getSectionContents(msi); + if (std::error_code ec = sectionContents.getError()) + return ec; + + StringRef secCont(reinterpret_cast<const char *>(sectionContents->begin()), + sectionContents->size()); + + unsigned int prev = 0; + for (std::size_t i = 0, e = sectionContents->size(); i != e; ++i) { + if ((*sectionContents)[i] == '\0') { + tokens.push_back(new (_readerStorage) MergeString( + prev, secCont.slice(prev, i + 1), msi, *sectionName)); + prev = i + 1; + } + } + } + + // Create Mergeable atoms + for (const MergeString *tai : tokens) { + ArrayRef<uint8_t> content((const uint8_t *)tai->_string.data(), + tai->_string.size()); + ErrorOr<ELFMergeAtom<ELFT> *> mergeAtom = + handleMergeString(tai->_sectionName, tai->_shdr, content, tai->_offset); + (*mergeAtom)->setOrdinal(++_ordinal); + _definedAtoms._atoms.push_back(*mergeAtom); + _mergeAtoms.push_back(*mergeAtom); + } + return std::error_code(); +} + +template <class ELFT> +std::error_code ELFFile<ELFT>::createSymbolsFromAtomizableSections() { + // Increment over all the symbols collecting atoms and symbol names for + // later use. + auto SymI = _objFile->begin_symbols(), SymE = _objFile->end_symbols(); + + // Skip over dummy sym. + if (SymI != SymE) + ++SymI; + + for (; SymI != SymE; ++SymI) { + const Elf_Shdr *section = _objFile->getSection(&*SymI); + + auto symbolName = _objFile->getSymbolName(SymI); + if (std::error_code ec = symbolName.getError()) + return ec; + + if (isAbsoluteSymbol(&*SymI)) { + ErrorOr<ELFAbsoluteAtom<ELFT> *> absAtom = + handleAbsoluteSymbol(*symbolName, &*SymI, (int64_t)getSymbolValue(&*SymI)); + _absoluteAtoms._atoms.push_back(*absAtom); + _symbolToAtomMapping.insert(std::make_pair(&*SymI, *absAtom)); + } else if (isUndefinedSymbol(&*SymI)) { + if (_useWrap && + (_wrapSymbolMap.find(*symbolName) != _wrapSymbolMap.end())) { + auto wrapAtom = _wrapSymbolMap.find(*symbolName); + _symbolToAtomMapping.insert( + std::make_pair(&*SymI, wrapAtom->getValue())); + continue; + } + ErrorOr<ELFUndefinedAtom<ELFT> *> undefAtom = + handleUndefinedSymbol(*symbolName, &*SymI); + _undefinedAtoms._atoms.push_back(*undefAtom); + _symbolToAtomMapping.insert(std::make_pair(&*SymI, *undefAtom)); + } else if (isCommonSymbol(&*SymI)) { + ErrorOr<ELFCommonAtom<ELFT> *> commonAtom = + handleCommonSymbol(*symbolName, &*SymI); + (*commonAtom)->setOrdinal(++_ordinal); + _definedAtoms._atoms.push_back(*commonAtom); + _symbolToAtomMapping.insert(std::make_pair(&*SymI, *commonAtom)); + } else if (isDefinedSymbol(&*SymI)) { + _sectionSymbols[section].push_back(SymI); + } else { + llvm::errs() << "Unable to create atom for: " << *symbolName << "\n"; + return llvm::object::object_error::parse_failed; + } + } + + return std::error_code(); +} + +template <class ELFT> std::error_code ELFFile<ELFT>::createAtoms() { + // Holds all the atoms that are part of the section. They are the targets of + // the kindGroupChild reference. + llvm::StringMap<std::vector<ELFDefinedAtom<ELFT> *>> atomsForSection; + // group sections have a mapping of the section header to the + // signature/section. + llvm::DenseMap<const Elf_Shdr *, std::pair<StringRef, StringRef>> + groupSections; + // Contains a list of comdat sections for a group. + llvm::DenseMap<const Elf_Shdr *, std::vector<StringRef>> comdatSections; + for (auto &i : _sectionSymbols) { + const Elf_Shdr *section = i.first; + std::vector<Elf_Sym_Iter> &symbols = i.second; + + // Sort symbols by position. + std::stable_sort(symbols.begin(), symbols.end(), + [this](Elf_Sym_Iter a, Elf_Sym_Iter b) { + return getSymbolValue(&*a) < getSymbolValue(&*b); + }); + + ErrorOr<StringRef> sectionName = this->getSectionName(section); + if (std::error_code ec = sectionName.getError()) + return ec; + + auto sectionContents = getSectionContents(section); + if (std::error_code ec = sectionContents.getError()) + return ec; + + bool addAtoms = true; + + // A section of type SHT_GROUP defines a grouping of sections. The name of a + // symbol from one of the containing object's symbol tables provides a + // signature + // for the section group. The section header of the SHT_GROUP section + // specifies + // the identifying symbol entry, as described : the sh_link member contains + // the section header index of the symbol table section that contains the + // entry. + // The sh_info member contains the symbol table index of the identifying + // entry. + // The sh_flags member of the section header contains 0. The name of the + // section + // (sh_name) is not specified. + if (isGroupSection(section)) { + const Elf_Word *groupMembers = + reinterpret_cast<const Elf_Word *>(sectionContents->data()); + const long count = (section->sh_size) / sizeof(Elf_Word); + for (int i = 1; i < count; i++) { + const Elf_Shdr *sHdr = _objFile->getSection(groupMembers[i]); + ErrorOr<StringRef> sectionName = _objFile->getSectionName(sHdr); + if (std::error_code ec = sectionName.getError()) + return ec; + comdatSections[section].push_back(*sectionName); + } + const Elf_Sym *symbol = _objFile->getSymbol(section->sh_info); + const Elf_Shdr *symtab = _objFile->getSection(section->sh_link); + ErrorOr<StringRef> symbolName = _objFile->getSymbolName(symtab, symbol); + if (std::error_code ec = symbolName.getError()) + return ec; + groupSections.insert( + std::make_pair(section, std::make_pair(*symbolName, *sectionName))); + continue; + } + + if (isGnuLinkOnceSection(*sectionName)) { + groupSections.insert( + std::make_pair(section, std::make_pair(*sectionName, *sectionName))); + addAtoms = false; + } + + if (isSectionMemberOfGroup(section)) + addAtoms = false; + + if (handleSectionWithNoSymbols(section, symbols)) { + ELFDefinedAtom<ELFT> *newAtom = + createSectionAtom(section, *sectionName, *sectionContents); + newAtom->setOrdinal(++_ordinal); + if (addAtoms) + _definedAtoms._atoms.push_back(newAtom); + else + atomsForSection[*sectionName].push_back(newAtom); + continue; + } + + ELFDefinedAtom<ELFT> *previousAtom = nullptr; + ELFReference<ELFT> *anonFollowedBy = nullptr; + + for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { + auto symbol = *si; + StringRef symbolName = ""; + if (symbol->getType() != llvm::ELF::STT_SECTION) { + auto symName = _objFile->getSymbolName(symbol); + if (std::error_code ec = symName.getError()) + return ec; + symbolName = *symName; + } + + uint64_t contentSize = symbolContentSize( + section, &*symbol, (si + 1 == se) ? nullptr : &**(si + 1)); + + // Check to see if we need to add the FollowOn Reference + ELFReference<ELFT> *followOn = nullptr; + if (previousAtom) { + // Replace the followon atom with the anonymous atom that we created, + // so that the next symbol that we create is a followon from the + // anonymous atom. + if (anonFollowedBy) { + followOn = anonFollowedBy; + } else { + followOn = new (_readerStorage) + ELFReference<ELFT>(lld::Reference::kindLayoutAfter); + previousAtom->addReference(followOn); + } + } + + ArrayRef<uint8_t> symbolData((const uint8_t *)sectionContents->data() + + getSymbolValue(&*symbol), + contentSize); + + // If the linker finds that a section has global atoms that are in a + // mergeable section, treat them as defined atoms as they shouldn't be + // merged away as well as these symbols have to be part of symbol + // resolution + if (isMergeableStringSection(section)) { + if (symbol->getBinding() == llvm::ELF::STB_GLOBAL) { + auto definedMergeAtom = handleDefinedSymbol( + symbolName, *sectionName, &**si, section, symbolData, + _references.size(), _references.size(), _references); + (*definedMergeAtom)->setOrdinal(++_ordinal); + if (addAtoms) + _definedAtoms._atoms.push_back(*definedMergeAtom); + else + atomsForSection[*sectionName].push_back(*definedMergeAtom); + } + continue; + } + + // Don't allocate content to a weak symbol, as they may be merged away. + // Create an anonymous atom to hold the data. + ELFDefinedAtom<ELFT> *anonAtom = nullptr; + anonFollowedBy = nullptr; + if (symbol->getBinding() == llvm::ELF::STB_WEAK) { + // Create anonymous new non-weak ELF symbol that holds the symbol + // data. + auto sym = new (_readerStorage) Elf_Sym(*symbol); + sym->setBinding(llvm::ELF::STB_GLOBAL); + anonAtom = createDefinedAtomAndAssignRelocations( + "", *sectionName, sym, section, symbolData, *sectionContents); + symbolData = ArrayRef<uint8_t>(); + + // If this is the last atom, let's not create a followon reference. + if (anonAtom && (si + 1) != se) { + anonFollowedBy = new (_readerStorage) + ELFReference<ELFT>(lld::Reference::kindLayoutAfter); + anonAtom->addReference(anonFollowedBy); + } + } + + ELFDefinedAtom<ELFT> *newAtom = createDefinedAtomAndAssignRelocations( + symbolName, *sectionName, &*symbol, section, symbolData, + *sectionContents); + newAtom->setOrdinal(++_ordinal); + + // If the atom was a weak symbol, let's create a followon reference to + // the anonymous atom that we created. + if (anonAtom) + createEdge(newAtom, anonAtom, Reference::kindLayoutAfter); + + if (previousAtom) { + // Set the followon atom to the weak atom that we have created, so + // that they would alias when the file gets written. + followOn->setTarget(anonAtom ? anonAtom : newAtom); + } + + // The previous atom is always the atom created before unless the atom + // is a weak atom. + previousAtom = anonAtom ? anonAtom : newAtom; + + if (addAtoms) + _definedAtoms._atoms.push_back(newAtom); + else + atomsForSection[*sectionName].push_back(newAtom); + + _symbolToAtomMapping.insert(std::make_pair(&*symbol, newAtom)); + if (anonAtom) { + anonAtom->setOrdinal(++_ordinal); + if (addAtoms) + _definedAtoms._atoms.push_back(anonAtom); + else + atomsForSection[*sectionName].push_back(anonAtom); + } + } + } + + // Iterate over all the group sections to create parent atoms pointing to + // group-child atoms. + for (auto § : groupSections) { + StringRef signature = sect.second.first; + StringRef groupSectionName = sect.second.second; + if (isGnuLinkOnceSection(signature)) + handleGnuLinkOnceSection(signature, atomsForSection, sect.first); + else if (isGroupSection(sect.first)) + handleSectionGroup(signature, groupSectionName, atomsForSection, + comdatSections, sect.first); + } + + updateReferences(); + return std::error_code(); +} + +template <class ELFT> +std::error_code ELFFile<ELFT>::handleGnuLinkOnceSection( + StringRef signature, + llvm::StringMap<std::vector<ELFDefinedAtom<ELFT> *>> &atomsForSection, + const Elf_Shdr *shdr) { + // TODO: Check for errors. + unsigned int referenceStart = _references.size(); + std::vector<ELFReference<ELFT> *> refs; + for (auto ha : atomsForSection[signature]) { + _groupChild[ha->symbol()] = std::make_pair(signature, shdr); + ELFReference<ELFT> *ref = + new (_readerStorage) ELFReference<ELFT>(lld::Reference::kindGroupChild); + ref->setTarget(ha); + refs.push_back(ref); + } + atomsForSection[signature].clear(); + // Create a gnu linkonce atom. + auto gnuLinkOnceAtom = handleDefinedSymbol( + signature, signature, nullptr, shdr, ArrayRef<uint8_t>(), referenceStart, + _references.size(), _references); + (*gnuLinkOnceAtom)->setOrdinal(++_ordinal); + _definedAtoms._atoms.push_back(*gnuLinkOnceAtom); + for (auto reference : refs) + (*gnuLinkOnceAtom)->addReference(reference); + return std::error_code(); +} + +template <class ELFT> +std::error_code ELFFile<ELFT>::handleSectionGroup( + StringRef signature, StringRef groupSectionName, + llvm::StringMap<std::vector<ELFDefinedAtom<ELFT> *>> &atomsForSection, + llvm::DenseMap<const Elf_Shdr *, std::vector<StringRef>> &comdatSections, + const Elf_Shdr *shdr) { + // TODO: Check for errors. + unsigned int referenceStart = _references.size(); + std::vector<ELFReference<ELFT> *> refs; + auto sectionNamesInGroup = comdatSections[shdr]; + for (auto sectionName : sectionNamesInGroup) { + for (auto ha : atomsForSection[sectionName]) { + _groupChild[ha->symbol()] = std::make_pair(signature, shdr); + ELFReference<ELFT> *ref = new (_readerStorage) + ELFReference<ELFT>(lld::Reference::kindGroupChild); + ref->setTarget(ha); + refs.push_back(ref); + } + atomsForSection[sectionName].clear(); + } + // Create a gnu linkonce atom. + auto sectionGroupAtom = handleDefinedSymbol( + signature, groupSectionName, nullptr, shdr, ArrayRef<uint8_t>(), + referenceStart, _references.size(), _references); + (*sectionGroupAtom)->setOrdinal(++_ordinal); + _definedAtoms._atoms.push_back(*sectionGroupAtom); + for (auto reference : refs) + (*sectionGroupAtom)->addReference(reference); + return std::error_code(); +} + +template <class ELFT> std::error_code ELFFile<ELFT>::createAtomsFromContext() { + if (!_useWrap) + return std::error_code(); + // Steps :- + // a) Create an undefined atom for the symbol specified by the --wrap option, + // as that + // may be needed to be pulled from an archive. + // b) Create an undefined atom for __wrap_<symbolname>. + // c) All references to the symbol specified by wrap should point to + // __wrap_<symbolname> + // d) All references to __real_symbol should point to the <symbol> + for (auto &wrapsym : _ctx.wrapCalls()) { + StringRef wrapStr = wrapsym.getKey(); + // Create a undefined symbol fror the wrap symbol. + UndefinedAtom *wrapSymAtom = + new (_readerStorage) SimpleUndefinedAtom(*this, wrapStr); + StringRef wrapCallSym = + _ctx.allocateString((llvm::Twine("__wrap_") + wrapStr).str()); + StringRef realCallSym = + _ctx.allocateString((llvm::Twine("__real_") + wrapStr).str()); + UndefinedAtom *wrapCallAtom = + new (_readerStorage) SimpleUndefinedAtom(*this, wrapCallSym); + // Create maps, when there is call to sym, it should point to wrapCallSym. + _wrapSymbolMap.insert(std::make_pair(wrapStr, wrapCallAtom)); + // Whenever there is a reference to realCall it should point to the symbol + // created for each wrap usage. + _wrapSymbolMap.insert(std::make_pair(realCallSym, wrapSymAtom)); + _undefinedAtoms._atoms.push_back(wrapSymAtom); + _undefinedAtoms._atoms.push_back(wrapCallAtom); + } + return std::error_code(); +} + +template <class ELFT> +ELFDefinedAtom<ELFT> *ELFFile<ELFT>::createDefinedAtomAndAssignRelocations( + StringRef symbolName, StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> symContent, + ArrayRef<uint8_t> secContent) { + unsigned int referenceStart = _references.size(); + + // Add Rela (those with r_addend) references: + auto rari = _relocationAddendReferences.find(sectionName); + if (rari != _relocationAddendReferences.end()) + createRelocationReferences(symbol, symContent, rari->second); + + // Add Rel references. + auto rri = _relocationReferences.find(sectionName); + if (rri != _relocationReferences.end()) + createRelocationReferences(symbol, symContent, secContent, rri->second); + + // Create the DefinedAtom and add it to the list of DefinedAtoms. + return *handleDefinedSymbol(symbolName, sectionName, symbol, section, + symContent, referenceStart, _references.size(), + _references); +} + +template <class ELFT> +void ELFFile<ELFT>::createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> content, + range<Elf_Rela_Iter> rels) { + bool isMips64EL = _objFile->isMips64EL(); + const auto symValue = getSymbolValue(symbol); + for (const auto &rel : rels) { + if (rel.r_offset < symValue || + symValue + content.size() <= rel.r_offset) + continue; + auto elfRelocation = new (_readerStorage) + ELFReference<ELFT>(&rel, rel.r_offset - symValue, kindArch(), + rel.getType(isMips64EL), rel.getSymbol(isMips64EL)); + addReferenceToSymbol(elfRelocation, symbol); + _references.push_back(elfRelocation); + } +} + +template <class ELFT> +void ELFFile<ELFT>::createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> symContent, + ArrayRef<uint8_t> secContent, + range<Elf_Rel_Iter> rels) { + bool isMips64EL = _objFile->isMips64EL(); + const auto symValue = getSymbolValue(symbol); + for (const auto &rel : rels) { + if (rel.r_offset < symValue || + symValue + symContent.size() <= rel.r_offset) + continue; + auto elfRelocation = new (_readerStorage) + ELFReference<ELFT>(rel.r_offset - symValue, kindArch(), + rel.getType(isMips64EL), rel.getSymbol(isMips64EL)); + int32_t addend = *(symContent.data() + rel.r_offset - symValue); + elfRelocation->setAddend(addend); + addReferenceToSymbol(elfRelocation, symbol); + _references.push_back(elfRelocation); + } +} + +template <class ELFT> +void ELFFile<ELFT>::updateReferenceForMergeStringAccess(ELFReference<ELFT> *ref, + const Elf_Sym *symbol, + const Elf_Shdr *shdr) { + // If the target atom is mergeable strefng atom, the atom might have been + // merged with other atom having the same contents. Try to find the + // merged one if that's the case. + int64_t addend = ref->addend(); + if (addend < 0) + addend = 0; + + const MergeSectionKey ms(shdr, addend); + auto msec = _mergedSectionMap.find(ms); + if (msec != _mergedSectionMap.end()) { + ref->setTarget(msec->second); + return; + } + + // The target atom was not merged. Mergeable atoms are not in + // _symbolToAtomMapping, so we cannot find it by calling findAtom(). We + // instead call findMergeAtom(). + if (symbol->getType() != llvm::ELF::STT_SECTION) + addend = getSymbolValue(symbol) + addend; + ELFMergeAtom<ELFT> *mergedAtom = findMergeAtom(shdr, addend); + ref->setOffset(addend - mergedAtom->offset()); + ref->setAddend(0); + ref->setTarget(mergedAtom); +} + +template <class ELFT> void ELFFile<ELFT>::updateReferences() { + for (auto &ri : _references) { + if (ri->kindNamespace() != lld::Reference::KindNamespace::ELF) + continue; + const Elf_Sym *symbol = _objFile->getSymbol(ri->targetSymbolIndex()); + const Elf_Shdr *shdr = _objFile->getSection(symbol); + + // If the atom is not in mergeable string section, the target atom is + // simply that atom. + if (isMergeableStringSection(shdr)) + updateReferenceForMergeStringAccess(ri, symbol, shdr); + else + ri->setTarget(findAtom(findSymbolForReference(ri), symbol)); + } +} + +template <class ELFT> +bool ELFFile<ELFT>::isIgnoredSection(const Elf_Shdr *section) { + switch (section->sh_type) { + case llvm::ELF::SHT_NULL: + case llvm::ELF::SHT_STRTAB: + case llvm::ELF::SHT_SYMTAB: + case llvm::ELF::SHT_SYMTAB_SHNDX: + return true; + default: + break; + } + return false; +} + +template <class ELFT> +bool ELFFile<ELFT>::isMergeableStringSection(const Elf_Shdr *section) { + if (_doStringsMerge && section) { + int64_t sectionFlags = section->sh_flags; + sectionFlags &= ~llvm::ELF::SHF_ALLOC; + // Mergeable string sections have both SHF_MERGE and SHF_STRINGS flags + // set. sh_entsize is the size of each character which is normally 1. + if ((section->sh_entsize < 2) && + (sectionFlags == (llvm::ELF::SHF_MERGE | llvm::ELF::SHF_STRINGS))) { + return true; + } + } + return false; +} + +template <class ELFT> +ELFDefinedAtom<ELFT> * +ELFFile<ELFT>::createSectionAtom(const Elf_Shdr *section, StringRef sectionName, + ArrayRef<uint8_t> content) { + Elf_Sym *sym = new (_readerStorage) Elf_Sym; + sym->st_name = 0; + sym->setBindingAndType(llvm::ELF::STB_LOCAL, llvm::ELF::STT_SECTION); + sym->st_other = 0; + sym->st_shndx = 0; + sym->st_value = 0; + sym->st_size = 0; + auto *newAtom = createDefinedAtomAndAssignRelocations( + "", sectionName, sym, section, content, content); + newAtom->setOrdinal(++_ordinal); + return newAtom; +} + +template <class ELFT> +uint64_t ELFFile<ELFT>::symbolContentSize(const Elf_Shdr *section, + const Elf_Sym *symbol, + const Elf_Sym *nextSymbol) { + const auto symValue = getSymbolValue(symbol); + // if this is the last symbol, take up the remaining data. + return nextSymbol ? getSymbolValue(nextSymbol) - symValue + : section->sh_size - symValue; +} + +template <class ELFT> +void ELFFile<ELFT>::createEdge(ELFDefinedAtom<ELFT> *from, + ELFDefinedAtom<ELFT> *to, uint32_t edgeKind) { + auto reference = new (_readerStorage) ELFReference<ELFT>(edgeKind); + reference->setTarget(to); + from->addReference(reference); +} + +/// Does the atom need to be redirected using a separate undefined atom? +template <class ELFT> +bool ELFFile<ELFT>::redirectReferenceUsingUndefAtom( + const Elf_Sym *sourceSymbol, const Elf_Sym *targetSymbol) const { + auto groupChildTarget = _groupChild.find(targetSymbol); + + // If the reference is not to a group child atom, there is no need to redirect + // using a undefined atom. Its also not needed if the source and target are + // from the same section. + if ((groupChildTarget == _groupChild.end()) || + (sourceSymbol->st_shndx == targetSymbol->st_shndx)) + return false; + + auto groupChildSource = _groupChild.find(sourceSymbol); + + // If the source symbol is not in a group, use a undefined symbol too. + if (groupChildSource == _groupChild.end()) + return true; + + // If the source and child are from the same group, we dont need the + // relocation to go through a undefined symbol. + if (groupChildSource->second.second == groupChildTarget->second.second) + return false; + + return true; +} + +} // end namespace elf +} // end namespace lld + +#endif // LLD_READER_WRITER_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/ELFLinkingContext.cpp b/lib/ReaderWriter/ELF/ELFLinkingContext.cpp new file mode 100644 index 000000000000..c7dffda8a463 --- /dev/null +++ b/lib/ReaderWriter/ELF/ELFLinkingContext.cpp @@ -0,0 +1,259 @@ +//===- lib/ReaderWriter/ELF/ELFLinkingContext.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "ELFFile.h" +#include "OrderPass.h" +#include "TargetHandler.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Config/config.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" + +#if defined(HAVE_CXXABI_H) +#include <cxxabi.h> +#endif + +namespace lld { + +class CommandLineUndefinedAtom : public SimpleUndefinedAtom { +public: + CommandLineUndefinedAtom(const File &f, StringRef name) + : SimpleUndefinedAtom(f, name) {} + + CanBeNull canBeNull() const override { + return CanBeNull::canBeNullAtBuildtime; + } +}; + +ELFLinkingContext::ELFLinkingContext( + llvm::Triple triple, std::unique_ptr<TargetHandlerBase> targetHandler) + : _outputELFType(llvm::ELF::ET_EXEC), _triple(triple), + _targetHandler(std::move(targetHandler)), _baseAddress(0), + _isStaticExecutable(false), _noInhibitExec(false), _exportDynamic(false), + _mergeCommonStrings(false), _useShlibUndefines(true), + _dynamicLinkerArg(false), _noAllowDynamicLibraries(false), + _mergeRODataToTextSegment(true), _demangle(true), + _stripSymbols(false), _alignSegments(true), _collectStats(false), + _outputMagic(OutputMagic::DEFAULT), _initFunction("_init"), + _finiFunction("_fini"), _sysrootPath(""), _linkerScriptSema() {} + +void ELFLinkingContext::addPasses(PassManager &pm) { + pm.add(llvm::make_unique<elf::OrderPass>()); +} + +uint16_t ELFLinkingContext::getOutputMachine() const { + switch (getTriple().getArch()) { + case llvm::Triple::x86: + return llvm::ELF::EM_386; + case llvm::Triple::x86_64: + return llvm::ELF::EM_X86_64; + case llvm::Triple::hexagon: + return llvm::ELF::EM_HEXAGON; + case llvm::Triple::mipsel: + case llvm::Triple::mips64el: + return llvm::ELF::EM_MIPS; + case llvm::Triple::aarch64: + return llvm::ELF::EM_AARCH64; + case llvm::Triple::arm: + return llvm::ELF::EM_ARM; + default: + llvm_unreachable("Unhandled arch"); + } +} + +StringRef ELFLinkingContext::entrySymbolName() const { + if (_outputELFType == llvm::ELF::ET_EXEC && _entrySymbolName.empty()) + return "_start"; + return _entrySymbolName; +} + +bool ELFLinkingContext::validateImpl(raw_ostream &diagnostics) { + switch (outputFileType()) { + case LinkingContext::OutputFileType::YAML: + _writer = createWriterYAML(*this); + break; + case LinkingContext::OutputFileType::Native: + llvm_unreachable("Unimplemented"); + break; + default: + _writer = createWriterELF(this->targetHandler()); + break; + } + + // If -dead_strip, set up initial live symbols. + if (deadStrip()) + addDeadStripRoot(entrySymbolName()); + return true; +} + +bool ELFLinkingContext::isDynamic() const { + switch (_outputELFType) { + case llvm::ELF::ET_EXEC: + return !_isStaticExecutable; + case llvm::ELF::ET_DYN: + return true; + } + return false; +} + +bool ELFLinkingContext::isRelativeReloc(const Reference &) const { + return false; +} + +Writer &ELFLinkingContext::writer() const { return *_writer; } + +static void buildSearchPath(SmallString<128> &path, StringRef dir, + StringRef sysRoot) { + if (!dir.startswith("=/")) + path.assign(dir); + else { + path.assign(sysRoot); + path.append(dir.substr(1)); + } +} + +ErrorOr<StringRef> ELFLinkingContext::searchLibrary(StringRef libName) const { + bool hasColonPrefix = libName[0] == ':'; + SmallString<128> path; + for (StringRef dir : _inputSearchPaths) { + // Search for dynamic library + if (!_isStaticExecutable) { + buildSearchPath(path, dir, _sysrootPath); + llvm::sys::path::append(path, hasColonPrefix + ? libName.drop_front() + : Twine("lib", libName) + ".so"); + if (llvm::sys::fs::exists(path.str())) + return StringRef(*new (_allocator) std::string(path.str())); + } + // Search for static libraries too + buildSearchPath(path, dir, _sysrootPath); + llvm::sys::path::append(path, hasColonPrefix + ? libName.drop_front() + : Twine("lib", libName) + ".a"); + if (llvm::sys::fs::exists(path.str())) + return StringRef(*new (_allocator) std::string(path.str())); + } + if (hasColonPrefix && llvm::sys::fs::exists(libName.drop_front())) + return libName.drop_front(); + + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +ErrorOr<StringRef> ELFLinkingContext::searchFile(StringRef fileName, + bool isSysRooted) const { + SmallString<128> path; + if (llvm::sys::path::is_absolute(fileName) && isSysRooted) { + path.assign(_sysrootPath); + path.append(fileName); + if (llvm::sys::fs::exists(path.str())) + return StringRef(*new (_allocator) std::string(path.str())); + } else if (llvm::sys::fs::exists(fileName)) + return fileName; + + if (llvm::sys::path::is_absolute(fileName)) + return make_error_code(llvm::errc::no_such_file_or_directory); + + for (StringRef dir : _inputSearchPaths) { + buildSearchPath(path, dir, _sysrootPath); + llvm::sys::path::append(path, fileName); + if (llvm::sys::fs::exists(path.str())) + return StringRef(*new (_allocator) std::string(path.str())); + } + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +void ELFLinkingContext::createInternalFiles( + std::vector<std::unique_ptr<File>> &files) const { + std::unique_ptr<SimpleFile> file( + new SimpleFile("<internal file for --defsym>")); + for (auto &i : getAbsoluteSymbols()) { + StringRef sym = i.first; + uint64_t val = i.second; + file->addAtom(*(new (_allocator) SimpleAbsoluteAtom( + *file, sym, Atom::scopeGlobal, val))); + } + files.push_back(std::move(file)); + LinkingContext::createInternalFiles(files); +} + +void ELFLinkingContext::finalizeInputFiles() { + // Add virtual archive that resolves undefined symbols. + if (_resolver) + getNodes().push_back(llvm::make_unique<FileNode>(std::move(_resolver))); +} + +std::unique_ptr<File> ELFLinkingContext::createUndefinedSymbolFile() const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr<SimpleFile> undefinedSymFile( + new SimpleFile("command line option -u")); + for (auto undefSymStr : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) CommandLineUndefinedAtom( + *undefinedSymFile, undefSymStr))); + return std::move(undefinedSymFile); +} + +void ELFLinkingContext::notifySymbolTableCoalesce(const Atom *existingAtom, + const Atom *newAtom, + bool &useNew) { + // First suppose that the `existingAtom` is defined + // and the `newAtom` is undefined. + auto *da = dyn_cast<DefinedAtom>(existingAtom); + auto *ua = dyn_cast<UndefinedAtom>(newAtom); + if (!da && !ua) { + // Then try to reverse the assumption. + da = dyn_cast<DefinedAtom>(newAtom); + ua = dyn_cast<UndefinedAtom>(existingAtom); + } + + if (da && ua && da->scope() == Atom::scopeGlobal && + isa<SharedLibraryFile>(ua->file())) + // If strong defined atom coalesces away an atom declared + // in the shared object the strong atom needs to be dynamically exported. + // Save its name. + _dynamicallyExportedSymbols.insert(ua->name()); +} + +std::string ELFLinkingContext::demangle(StringRef symbolName) const { + if (!demangleSymbols()) + return symbolName; + + // Only try to demangle symbols that look like C++ symbols + if (!symbolName.startswith("_Z")) + return symbolName; + +#if defined(HAVE_CXXABI_H) + SmallString<256> symBuff; + StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); + const char *cstr = nullTermSym.data(); + int status; + char *demangled = abi::__cxa_demangle(cstr, nullptr, nullptr, &status); + if (demangled != NULL) { + std::string result(demangled); + // __cxa_demangle() always uses a malloc'ed buffer to return the result. + free(demangled); + return result; + } +#endif + + return symbolName; +} + +void ELFLinkingContext::setUndefinesResolver(std::unique_ptr<File> resolver) { + assert(isa<ArchiveLibraryFile>(resolver.get()) && "Wrong resolver type"); + _resolver = std::move(resolver); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/ELF/ELFReader.h b/lib/ReaderWriter/ELF/ELFReader.h new file mode 100644 index 000000000000..43f218115c66 --- /dev/null +++ b/lib/ReaderWriter/ELF/ELFReader.h @@ -0,0 +1,102 @@ +//===- lib/ReaderWriter/ELF/ELFReader.h -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_READER_H +#define LLD_READER_WRITER_ELF_READER_H + +#include "CreateELF.h" +#include "DynamicFile.h" +#include "ELFFile.h" +#include "lld/Core/Reader.h" + +namespace lld { +namespace elf { + +template <typename ELFT, typename ELFTraitsT, typename ContextT> +class ELFObjectReader : public Reader { +public: + typedef llvm::object::Elf_Ehdr_Impl<ELFT> Elf_Ehdr; + + ELFObjectReader(ContextT &ctx, uint64_t machine) + : _ctx(ctx), _machine(machine) {} + + bool canParse(file_magic magic, StringRef, + const MemoryBuffer &buf) const override { + return (magic == llvm::sys::fs::file_magic::elf_relocatable && + elfHeader(buf)->e_machine == _machine); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + std::size_t maxAlignment = + 1ULL << llvm::countTrailingZeros(uintptr_t(mb->getBufferStart())); + auto f = + createELF<ELFTraitsT>(llvm::object::getElfArchType(mb->getBuffer()), + maxAlignment, std::move(mb), _ctx); + if (std::error_code ec = f.getError()) + return ec; + result.push_back(std::move(*f)); + return std::error_code(); + } + + const Elf_Ehdr *elfHeader(const MemoryBuffer &buf) const { + const uint8_t *data = + reinterpret_cast<const uint8_t *>(buf.getBuffer().data()); + return (reinterpret_cast<const Elf_Ehdr *>(data)); + } + +protected: + ContextT &_ctx; + uint64_t _machine; +}; + +template <typename ELFT, typename ELFTraitsT, typename ContextT> +class ELFDSOReader : public Reader { +public: + typedef llvm::object::Elf_Ehdr_Impl<ELFT> Elf_Ehdr; + + ELFDSOReader(ContextT &ctx, uint64_t machine) + : _ctx(ctx), _machine(machine) {} + + bool canParse(file_magic magic, StringRef, + const MemoryBuffer &buf) const override { + return (magic == llvm::sys::fs::file_magic::elf_shared_object && + elfHeader(buf)->e_machine == _machine); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + std::size_t maxAlignment = + 1ULL << llvm::countTrailingZeros(uintptr_t(mb->getBufferStart())); + auto f = + createELF<ELFTraitsT>(llvm::object::getElfArchType(mb->getBuffer()), + maxAlignment, std::move(mb), _ctx); + if (std::error_code ec = f.getError()) + return ec; + result.push_back(std::move(*f)); + return std::error_code(); + } + + const Elf_Ehdr *elfHeader(const MemoryBuffer &buf) const { + const uint8_t *data = + reinterpret_cast<const uint8_t *>(buf.getBuffer().data()); + return (reinterpret_cast<const Elf_Ehdr *>(data)); + } + +protected: + ContextT &_ctx; + uint64_t _machine; +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_READER_H diff --git a/lib/ReaderWriter/ELF/ExecutableWriter.h b/lib/ReaderWriter/ELF/ExecutableWriter.h new file mode 100644 index 000000000000..477e3920abae --- /dev/null +++ b/lib/ReaderWriter/ELF/ExecutableWriter.h @@ -0,0 +1,182 @@ +//===- lib/ReaderWriter/ELF/ExecutableWriter.h ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_EXECUTABLE_WRITER_H +#define LLD_READER_WRITER_ELF_EXECUTABLE_WRITER_H + +#include "OutputELFWriter.h" + +namespace lld { +namespace elf { +using namespace llvm; +using namespace llvm::object; + +template<class ELFT> +class ExecutableWriter; + +//===----------------------------------------------------------------------===// +// ExecutableWriter Class +//===----------------------------------------------------------------------===// +template<class ELFT> +class ExecutableWriter : public OutputELFWriter<ELFT> { +public: + ExecutableWriter(ELFLinkingContext &context, TargetLayout<ELFT> &layout) + : OutputELFWriter<ELFT>(context, layout), + _runtimeFile(new RuntimeFile<ELFT>(context, "C runtime")) {} + +protected: + virtual void buildDynamicSymbolTable(const File &file); + virtual void addDefaultAtoms(); + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File> > &); + virtual void finalizeDefaultAtomValues(); + virtual void createDefaultSections(); + + virtual bool isNeededTagRequired(const SharedLibraryAtom *sla) const { + return this->_layout.isCopied(sla); + } + + unique_bump_ptr<InterpSection<ELFT>> _interpSection; + std::unique_ptr<RuntimeFile<ELFT> > _runtimeFile; +}; + +//===----------------------------------------------------------------------===// +// ExecutableWriter +//===----------------------------------------------------------------------===// +template<class ELFT> +void ExecutableWriter<ELFT>::buildDynamicSymbolTable(const File &file) { + for (auto sec : this->_layout.sections()) + if (auto section = dyn_cast<AtomSection<ELFT>>(sec)) + for (const auto &atom : section->atoms()) { + const DefinedAtom *da = dyn_cast<const DefinedAtom>(atom->_atom); + if (!da) + continue; + if (da->dynamicExport() != DefinedAtom::dynamicExportAlways && + !this->_context.isDynamicallyExportedSymbol(da->name()) && + !(this->_context.shouldExportDynamic() && + da->scope() == Atom::Scope::scopeGlobal)) + continue; + this->_dynamicSymbolTable->addSymbol(atom->_atom, section->ordinal(), + atom->_virtualAddr, atom); + } + + // Put weak symbols in the dynamic symbol table. + if (this->_context.isDynamic()) { + for (const UndefinedAtom *a : file.undefined()) { + if (this->_layout.isReferencedByDefinedAtom(a) && + a->canBeNull() != UndefinedAtom::canBeNullNever) + this->_dynamicSymbolTable->addSymbol(a, ELF::SHN_UNDEF); + } + } + + OutputELFWriter<ELFT>::buildDynamicSymbolTable(file); +} + +/// \brief Add absolute symbols by default. These are linker added +/// absolute symbols +template<class ELFT> +void ExecutableWriter<ELFT>::addDefaultAtoms() { + OutputELFWriter<ELFT>::addDefaultAtoms(); + _runtimeFile->addUndefinedAtom(this->_context.entrySymbolName()); + _runtimeFile->addAbsoluteAtom("__bss_start"); + _runtimeFile->addAbsoluteAtom("__bss_end"); + _runtimeFile->addAbsoluteAtom("_end"); + _runtimeFile->addAbsoluteAtom("end"); + _runtimeFile->addAbsoluteAtom("__preinit_array_start"); + _runtimeFile->addAbsoluteAtom("__preinit_array_end"); + _runtimeFile->addAbsoluteAtom("__init_array_start"); + _runtimeFile->addAbsoluteAtom("__init_array_end"); + if (this->_context.isRelaOutputFormat()) { + _runtimeFile->addAbsoluteAtom("__rela_iplt_start"); + _runtimeFile->addAbsoluteAtom("__rela_iplt_end"); + } else { + _runtimeFile->addAbsoluteAtom("__rel_iplt_start"); + _runtimeFile->addAbsoluteAtom("__rel_iplt_end"); + } + _runtimeFile->addAbsoluteAtom("__fini_array_start"); + _runtimeFile->addAbsoluteAtom("__fini_array_end"); +} + +/// \brief Hook in lld to add CRuntime file +template <class ELFT> +bool ExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + // Add the default atoms as defined by executables + ExecutableWriter<ELFT>::addDefaultAtoms(); + OutputELFWriter<ELFT>::createImplicitFiles(result); + result.push_back(std::move(_runtimeFile)); + return true; +} + +template <class ELFT> void ExecutableWriter<ELFT>::createDefaultSections() { + OutputELFWriter<ELFT>::createDefaultSections(); + if (this->_context.isDynamic()) { + _interpSection.reset(new (this->_alloc) InterpSection<ELFT>( + this->_context, ".interp", DefaultLayout<ELFT>::ORDER_INTERP, + this->_context.getInterpreter())); + this->_layout.addSection(_interpSection.get()); + } +} + +/// Finalize the value of all the absolute symbols that we +/// created +template <class ELFT> void ExecutableWriter<ELFT>::finalizeDefaultAtomValues() { + OutputELFWriter<ELFT>::finalizeDefaultAtomValues(); + auto bssStartAtomIter = this->_layout.findAbsoluteAtom("__bss_start"); + auto bssEndAtomIter = this->_layout.findAbsoluteAtom("__bss_end"); + auto underScoreEndAtomIter = this->_layout.findAbsoluteAtom("_end"); + auto endAtomIter = this->_layout.findAbsoluteAtom("end"); + + auto startEnd = [&](StringRef sym, StringRef sec) -> void { + std::string start = ("__" + sym + "_start").str(); + std::string end = ("__" + sym + "_end").str(); + auto s = this->_layout.findAbsoluteAtom(start); + auto e = this->_layout.findAbsoluteAtom(end); + auto section = this->_layout.findOutputSection(sec); + if (section) { + (*s)->_virtualAddr = section->virtualAddr(); + (*e)->_virtualAddr = section->virtualAddr() + section->memSize(); + } else { + (*s)->_virtualAddr = 0; + (*e)->_virtualAddr = 0; + } + }; + + startEnd("preinit_array", ".preinit_array"); + startEnd("init_array", ".init_array"); + if (this->_context.isRelaOutputFormat()) + startEnd("rela_iplt", ".rela.plt"); + else + startEnd("rel_iplt", ".rel.plt"); + startEnd("fini_array", ".fini_array"); + + assert(!(bssStartAtomIter == this->_layout.absoluteAtoms().end() || + bssEndAtomIter == this->_layout.absoluteAtoms().end() || + underScoreEndAtomIter == this->_layout.absoluteAtoms().end() || + endAtomIter == this->_layout.absoluteAtoms().end()) && + "Unable to find the absolute atoms that have been added by lld"); + + auto bssSection = this->_layout.findOutputSection(".bss"); + + // If we don't find a bss section, then don't set these values + if (bssSection) { + (*bssStartAtomIter)->_virtualAddr = bssSection->virtualAddr(); + (*bssEndAtomIter)->_virtualAddr = + bssSection->virtualAddr() + bssSection->memSize(); + (*underScoreEndAtomIter)->_virtualAddr = (*bssEndAtomIter)->_virtualAddr; + (*endAtomIter)->_virtualAddr = (*bssEndAtomIter)->_virtualAddr; + } else if (auto dataSection = this->_layout.findOutputSection(".data")) { + (*underScoreEndAtomIter)->_virtualAddr = + dataSection->virtualAddr() + dataSection->memSize(); + (*endAtomIter)->_virtualAddr = (*underScoreEndAtomIter)->_virtualAddr; + } +} + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_EXECUTABLE_WRITER_H diff --git a/lib/ReaderWriter/ELF/HeaderChunks.h b/lib/ReaderWriter/ELF/HeaderChunks.h new file mode 100644 index 000000000000..eab132b9b2f6 --- /dev/null +++ b/lib/ReaderWriter/ELF/HeaderChunks.h @@ -0,0 +1,364 @@ +//===- lib/ReaderWriter/ELF/HeaderChunks.h --------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_HEADER_CHUNKS_H +#define LLD_READER_WRITER_ELF_HEADER_CHUNKS_H + +#include "SegmentChunks.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" + +/// \brief An Header represents the Elf[32/64]_Ehdr structure at the +/// start of an ELF executable file. +namespace lld { +namespace elf { +template <class ELFT> class ELFHeader : public Chunk<ELFT> { +public: + typedef llvm::object::Elf_Ehdr_Impl<ELFT> Elf_Ehdr; + + ELFHeader(const ELFLinkingContext &); + + void e_ident(int I, unsigned char C) { _eh.e_ident[I] = C; } + void e_type(uint16_t type) { _eh.e_type = type; } + void e_machine(uint16_t machine) { _eh.e_machine = machine; } + void e_version(uint32_t version) { _eh.e_version = version; } + void e_entry(int64_t entry) { _eh.e_entry = entry; } + void e_phoff(int64_t phoff) { _eh.e_phoff = phoff; } + void e_shoff(int64_t shoff) { _eh.e_shoff = shoff; } + void e_flags(uint32_t flags) { _eh.e_flags = flags; } + void e_ehsize(uint16_t ehsize) { _eh.e_ehsize = ehsize; } + void e_phentsize(uint16_t phentsize) { _eh.e_phentsize = phentsize; } + void e_phnum(uint16_t phnum) { _eh.e_phnum = phnum; } + void e_shentsize(uint16_t shentsize) { _eh.e_shentsize = shentsize; } + void e_shnum(uint16_t shnum) { _eh.e_shnum = shnum; } + void e_shstrndx(uint16_t shstrndx) { _eh.e_shstrndx = shstrndx; } + uint64_t fileSize() const { return sizeof(Elf_Ehdr); } + + static bool classof(const Chunk<ELFT> *c) { + return c->Kind() == Chunk<ELFT>::Kind::ELFHeader; + } + + int getContentType() const { return Chunk<ELFT>::ContentType::Header; } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer); + + virtual void doPreFlight() {} + + void finalize() { + _eh.e_ident[llvm::ELF::EI_CLASS] = + (ELFT::Is64Bits) ? llvm::ELF::ELFCLASS64 : llvm::ELF::ELFCLASS32; + _eh.e_ident[llvm::ELF::EI_DATA] = + (ELFT::TargetEndianness == llvm::support::little) + ? llvm::ELF::ELFDATA2LSB + : llvm::ELF::ELFDATA2MSB; + _eh.e_type = this->_context.getOutputELFType(); + _eh.e_machine = this->_context.getOutputMachine(); + } + +private: + Elf_Ehdr _eh; +}; + +template <class ELFT> +ELFHeader<ELFT>::ELFHeader(const ELFLinkingContext &context) + : Chunk<ELFT>("elfhdr", Chunk<ELFT>::Kind::ELFHeader, context) { + this->_alignment = ELFT::Is64Bits ? 8 : 4; + this->_fsize = sizeof(Elf_Ehdr); + this->_msize = sizeof(Elf_Ehdr); + memset(_eh.e_ident, 0, llvm::ELF::EI_NIDENT); + e_ident(llvm::ELF::EI_MAG0, 0x7f); + e_ident(llvm::ELF::EI_MAG1, 'E'); + e_ident(llvm::ELF::EI_MAG2, 'L'); + e_ident(llvm::ELF::EI_MAG3, 'F'); + e_ehsize(sizeof(Elf_Ehdr)); + e_flags(0); +} + +template <class ELFT> +void ELFHeader<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *atomContent = chunkBuffer + this->fileOffset(); + memcpy(atomContent, &_eh, fileSize()); +} + +/// \brief An ProgramHeader represents the Elf[32/64]_Phdr structure at the +/// start of an ELF executable file. +template<class ELFT> +class ProgramHeader : public Chunk<ELFT> { +public: + typedef llvm::object::Elf_Phdr_Impl<ELFT> Elf_Phdr; + typedef typename std::vector<Elf_Phdr *>::iterator PhIterT; + typedef typename std::reverse_iterator<PhIterT> ReversePhIterT; + + /// \brief Find a program header entry, given the type of entry that + /// we are looking for + class FindPhdr { + public: + FindPhdr(uint64_t type, uint64_t flags, uint64_t flagsClear) + : _type(type) + , _flags(flags) + , _flagsClear(flagsClear) { + } + + bool operator()(const llvm::object::Elf_Phdr_Impl<ELFT> *j) const { + return ((j->p_type == _type) && + ((j->p_flags & _flags) == _flags) && + (!(j->p_flags & _flagsClear))); + } + private: + uint64_t _type; + uint64_t _flags; + uint64_t _flagsClear; + }; + + ProgramHeader(const ELFLinkingContext &context) + : Chunk<ELFT>("elfphdr", Chunk<ELFT>::Kind::ProgramHeader, context) { + this->_alignment = ELFT::Is64Bits ? 8 : 4; + resetProgramHeaders(); + } + + bool addSegment(Segment<ELFT> *segment); + + void resetProgramHeaders() { _phi = _ph.begin(); } + + uint64_t fileSize() const { return sizeof(Elf_Phdr) * _ph.size(); } + + static bool classof(const Chunk<ELFT> *c) { + return c->Kind() == Chunk<ELFT>::Kind::ProgramHeader; + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer); + + /// \brief find a program header entry in the list of program headers + ReversePhIterT + findProgramHeader(uint64_t type, uint64_t flags, uint64_t flagClear) { + return std::find_if(_ph.rbegin(), _ph.rend(), + FindPhdr(type, flags, flagClear)); + } + + PhIterT begin() { + return _ph.begin(); + } + + PhIterT end() { + return _ph.end(); + } + + ReversePhIterT rbegin() { return _ph.rbegin(); } + + ReversePhIterT rend() { return _ph.rend(); } + + virtual void doPreFlight() {} + + void finalize() {} + + int64_t entsize() { return sizeof(Elf_Phdr); } + + int64_t numHeaders() { + return _ph.size(); + } + + int getContentType() const { return Chunk<ELFT>::ContentType::Header; } + +private: + Elf_Phdr *allocateProgramHeader(bool &allocatedNew) { + Elf_Phdr *phdr; + if (_phi == _ph.end()) { + phdr = new (_allocator) Elf_Phdr; + _ph.push_back(phdr); + _phi = _ph.end(); + allocatedNew = true; + } else { + phdr = (*_phi); + ++_phi; + } + return phdr; + } + + std::vector<Elf_Phdr *> _ph; + PhIterT _phi; + llvm::BumpPtrAllocator _allocator; +}; + +template <class ELFT> +bool ProgramHeader<ELFT>::addSegment(Segment<ELFT> *segment) { + bool allocatedNew = false; + ELFLinkingContext::OutputMagic outputMagic = this->_context.getOutputMagic(); + // For segments that are not a loadable segment, we + // just pick the values directly from the segment as there + // wouldnt be any slices within that + if (segment->segmentType() != llvm::ELF::PT_LOAD) { + Elf_Phdr *phdr = allocateProgramHeader(allocatedNew); + phdr->p_type = segment->segmentType(); + phdr->p_offset = segment->fileOffset(); + phdr->p_vaddr = segment->virtualAddr(); + phdr->p_paddr = segment->virtualAddr(); + phdr->p_filesz = segment->fileSize(); + phdr->p_memsz = segment->memSize(); + phdr->p_flags = segment->flags(); + phdr->p_align = segment->alignment(); + this->_fsize = fileSize(); + this->_msize = this->_fsize; + return allocatedNew; + } + // For all other segments, use the slice + // to derive program headers + for (auto slice : segment->slices()) { + Elf_Phdr *phdr = allocateProgramHeader(allocatedNew); + phdr->p_type = segment->segmentType(); + phdr->p_offset = slice->fileOffset(); + phdr->p_vaddr = slice->virtualAddr(); + phdr->p_paddr = slice->virtualAddr(); + phdr->p_filesz = slice->fileSize(); + phdr->p_memsz = slice->memSize(); + phdr->p_flags = segment->flags(); + phdr->p_align = slice->alignment(); + uint64_t segPageSize = segment->pageSize(); + uint64_t sliceAlign = slice->alignment(); + // Alignment of PT_LOAD segments are set to the page size, but if the + // alignment of the slice is greater than the page size, set the alignment + // of the segment appropriately. + if (outputMagic != ELFLinkingContext::OutputMagic::NMAGIC && + outputMagic != ELFLinkingContext::OutputMagic::OMAGIC) { + phdr->p_align = (phdr->p_type == llvm::ELF::PT_LOAD) + ? (segPageSize < sliceAlign) ? sliceAlign : segPageSize + : sliceAlign; + } else + phdr->p_align = slice->alignment(); + } + this->_fsize = fileSize(); + this->_msize = this->_fsize; + + return allocatedNew; +} + +template <class ELFT> +void ProgramHeader<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + for (auto phi : _ph) { + memcpy(dest, phi, sizeof(Elf_Phdr)); + dest += sizeof(Elf_Phdr); + } +} + +/// \brief An SectionHeader represents the Elf[32/64]_Shdr structure +/// at the end of the file +template<class ELFT> +class SectionHeader : public Chunk<ELFT> { +public: + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + + SectionHeader(const ELFLinkingContext &, int32_t order); + + void appendSection(OutputSection<ELFT> *section); + + void updateSection(Section<ELFT> *section); + + static bool classof(const Chunk<ELFT> *c) { + return c->getChunkKind() == Chunk<ELFT>::Kind::SectionHeader; + } + + void setStringSection(StringTable<ELFT> *s) { + _stringSection = s; + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer); + + virtual void doPreFlight() {} + + void finalize() {} + + uint64_t fileSize() const { return sizeof(Elf_Shdr) * _sectionInfo.size(); } + + uint64_t entsize() { return sizeof(Elf_Shdr); } + + int getContentType() const { return Chunk<ELFT>::ContentType::Header; } + + uint64_t numHeaders() { return _sectionInfo.size(); } + +private: + StringTable<ELFT> *_stringSection; + std::vector<Elf_Shdr*> _sectionInfo; + llvm::BumpPtrAllocator _sectionAllocate; +}; + +template <class ELFT> +SectionHeader<ELFT>::SectionHeader(const ELFLinkingContext &context, + int32_t order) + : Chunk<ELFT>("shdr", Chunk<ELFT>::Kind::SectionHeader, context) { + this->_fsize = 0; + this->_alignment = 8; + this->setOrder(order); + // The first element in the list is always NULL + Elf_Shdr *nullshdr = new (_sectionAllocate.Allocate<Elf_Shdr>()) Elf_Shdr; + ::memset(nullshdr, 0, sizeof (Elf_Shdr)); + _sectionInfo.push_back(nullshdr); + this->_fsize += sizeof (Elf_Shdr); +} + +template <class ELFT> +void SectionHeader<ELFT>::appendSection(OutputSection<ELFT> *section) { + Elf_Shdr *shdr = new (_sectionAllocate.Allocate<Elf_Shdr>()) Elf_Shdr; + shdr->sh_name = _stringSection->addString(section->name()); + shdr->sh_type = section->type(); + shdr->sh_flags = section->flags(); + shdr->sh_offset = section->fileOffset(); + shdr->sh_addr = section->virtualAddr(); + if (section->isLoadableSection()) + shdr->sh_size = section->memSize(); + else + shdr->sh_size = section->fileSize(); + shdr->sh_link = section->link(); + shdr->sh_info = section->shinfo(); + shdr->sh_addralign = section->alignment(); + shdr->sh_entsize = section->entsize(); + _sectionInfo.push_back(shdr); +} + +template<class ELFT> +void +SectionHeader<ELFT>::updateSection(Section<ELFT> *section) { + Elf_Shdr *shdr = _sectionInfo[section->ordinal()]; + shdr->sh_type = section->getType(); + shdr->sh_flags = section->getFlags(); + shdr->sh_offset = section->fileOffset(); + shdr->sh_addr = section->virtualAddr(); + shdr->sh_size = section->fileSize(); + shdr->sh_link = section->getLink(); + shdr->sh_info = section->getInfo(); + shdr->sh_addralign = section->alignment(); + shdr->sh_entsize = section->getEntSize(); +} + +template <class ELFT> +void SectionHeader<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + for (auto shi : _sectionInfo) { + memcpy(dest, shi, sizeof(Elf_Shdr)); + dest += sizeof(Elf_Shdr); + } + _stringSection->write(writer, layout, buffer); +} +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Hexagon/CMakeLists.txt b/lib/ReaderWriter/ELF/Hexagon/CMakeLists.txt new file mode 100644 index 000000000000..6928f43c5459 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_library(lldHexagonELFTarget + HexagonLinkingContext.cpp + HexagonRelocationHandler.cpp + HexagonTargetHandler.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonDynamicLibraryWriter.h b/lib/ReaderWriter/ELF/Hexagon/HexagonDynamicLibraryWriter.h new file mode 100644 index 000000000000..e2d3193045b7 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonDynamicLibraryWriter.h @@ -0,0 +1,79 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonDynamicLibraryWriter.h ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef HEXAGON_DYNAMIC_LIBRARY_WRITER_H +#define HEXAGON_DYNAMIC_LIBRARY_WRITER_H + +#include "DynamicLibraryWriter.h" +#include "HexagonExecutableAtoms.h" +#include "HexagonLinkingContext.h" + +namespace lld { +namespace elf { + +template <typename ELFT> class HexagonTargetLayout; + +template <class ELFT> +class HexagonDynamicLibraryWriter : public DynamicLibraryWriter<ELFT>, + public HexagonELFWriter<ELFT> { +public: + HexagonDynamicLibraryWriter(HexagonLinkingContext &context, + HexagonTargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues(); + + virtual std::error_code setELFHeader() { + DynamicLibraryWriter<ELFT>::setELFHeader(); + HexagonELFWriter<ELFT>::setELFHeader(*this->_elfHeader); + return std::error_code(); + } + +private: + void addDefaultAtoms() { + _hexagonRuntimeFile->addAbsoluteAtom("_GLOBAL_OFFSET_TABLE_"); + _hexagonRuntimeFile->addAbsoluteAtom("_DYNAMIC"); + } + + HexagonLinkingContext &_hexagonLinkingContext; + HexagonTargetLayout<ELFT> &_hexagonTargetLayout; + std::unique_ptr<HexagonRuntimeFile<ELFT>> _hexagonRuntimeFile; +}; + +template <class ELFT> +HexagonDynamicLibraryWriter<ELFT>::HexagonDynamicLibraryWriter( + HexagonLinkingContext &context, HexagonTargetLayout<ELFT> &layout) + : DynamicLibraryWriter<ELFT>(context, layout), + HexagonELFWriter<ELFT>(context, layout), _hexagonLinkingContext(context), + _hexagonTargetLayout(layout), + _hexagonRuntimeFile(new HexagonRuntimeFile<ELFT>(context)) {} + +template <class ELFT> +bool HexagonDynamicLibraryWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + DynamicLibraryWriter<ELFT>::createImplicitFiles(result); + // Add the default atoms as defined for hexagon + addDefaultAtoms(); + result.push_back(std::move(_hexagonRuntimeFile)); + return true; +} + +template <class ELFT> +void HexagonDynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues() { + // Finalize the atom values that are part of the parent. + DynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues(); + HexagonELFWriter<ELFT>::finalizeHexagonRuntimeAtomValues(); +} + +} // namespace elf +} // namespace lld + +#endif // HEXAGON_DYNAMIC_LIBRARY_WRITER_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonELFFile.h b/lib/ReaderWriter/ELF/Hexagon/HexagonELFFile.h new file mode 100644 index 000000000000..ab0b9b432b43 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonELFFile.h @@ -0,0 +1,170 @@ +//===- lib/ReaderWriter/ELF/HexagonELFFile.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_HEXAGON_ELF_FILE_H +#define LLD_READER_WRITER_ELF_HEXAGON_ELF_FILE_H + +#include "ELFReader.h" +#include "HexagonLinkingContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> class HexagonELFFile; + +template <class ELFT> +class HexagonELFDefinedAtom : public ELFDefinedAtom<ELFT> { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + HexagonELFDefinedAtom(const HexagonELFFile<ELFT> &file, StringRef symbolName, + StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) + : ELFDefinedAtom<ELFT>(file, symbolName, sectionName, symbol, section, + contentData, referenceStart, referenceEnd, + referenceList) {} + + virtual DefinedAtom::ContentType contentType() const { + if (this->_contentType != DefinedAtom::typeUnknown) + return this->_contentType; + else if (this->_section->sh_flags & llvm::ELF::SHF_HEX_GPREL) { + if (this->_section->sh_type == llvm::ELF::SHT_NOBITS) + return (this->_contentType = DefinedAtom::typeZeroFillFast); + else + return (this->_contentType = DefinedAtom::typeDataFast); + } + return ELFDefinedAtom<ELFT>::contentType(); + } + + virtual DefinedAtom::ContentPermissions permissions() const { + if (this->_section->sh_flags & llvm::ELF::SHF_HEX_GPREL) + return DefinedAtom::permRW_; + return ELFDefinedAtom<ELFT>::permissions(); + } +}; + +template <class ELFT> class HexagonELFCommonAtom : public ELFCommonAtom<ELFT> { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + HexagonELFCommonAtom(const HexagonELFFile<ELFT> &file, StringRef symbolName, + const Elf_Sym *symbol) + : ELFCommonAtom<ELFT>(file, symbolName, symbol) {} + + virtual bool isSmallCommonSymbol() const { + switch (this->_symbol->st_shndx) { + // Common symbols + case llvm::ELF::SHN_HEXAGON_SCOMMON: + case llvm::ELF::SHN_HEXAGON_SCOMMON_1: + case llvm::ELF::SHN_HEXAGON_SCOMMON_2: + case llvm::ELF::SHN_HEXAGON_SCOMMON_4: + case llvm::ELF::SHN_HEXAGON_SCOMMON_8: + return true; + default: + break; + } + return false; + } + + virtual uint64_t size() const { + if (isSmallCommonSymbol()) + return this->_symbol->st_size; + return ELFCommonAtom<ELFT>::size(); + } + + virtual DefinedAtom::Merge merge() const { + if (this->_symbol->getBinding() == llvm::ELF::STB_WEAK) + return DefinedAtom::mergeAsWeak; + if (isSmallCommonSymbol()) + return DefinedAtom::mergeAsTentative; + return ELFCommonAtom<ELFT>::merge(); + } + + virtual DefinedAtom::ContentType contentType() const { + if (isSmallCommonSymbol()) + return DefinedAtom::typeZeroFillFast; + return ELFCommonAtom<ELFT>::contentType(); + } + + virtual DefinedAtom::Alignment alignment() const { + if (isSmallCommonSymbol()) + return DefinedAtom::Alignment(llvm::Log2_64(this->_symbol->st_value)); + return ELFCommonAtom<ELFT>::alignment(); + } + + virtual DefinedAtom::ContentPermissions permissions() const { + if (isSmallCommonSymbol()) + return DefinedAtom::permRW_; + return ELFCommonAtom<ELFT>::permissions(); + } +}; + +template <class ELFT> class HexagonELFFile : public ELFFile<ELFT> { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + HexagonELFFile(std::unique_ptr<MemoryBuffer> mb, HexagonLinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<HexagonELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, HexagonLinkingContext &ctx) { + return std::unique_ptr<HexagonELFFile<ELFT>>( + new HexagonELFFile<ELFT>(std::move(mb), ctx)); + } + + bool isCommonSymbol(const Elf_Sym *symbol) const override { + switch (symbol->st_shndx) { + // Common symbols + case llvm::ELF::SHN_HEXAGON_SCOMMON: + case llvm::ELF::SHN_HEXAGON_SCOMMON_1: + case llvm::ELF::SHN_HEXAGON_SCOMMON_2: + case llvm::ELF::SHN_HEXAGON_SCOMMON_4: + case llvm::ELF::SHN_HEXAGON_SCOMMON_8: + return true; + default: + break; + } + return ELFFile<ELFT>::isCommonSymbol(symbol); + } + + /// Process the Defined symbol and create an atom for it. + ErrorOr<ELFDefinedAtom<ELFT> *> + handleDefinedSymbol(StringRef symName, StringRef sectionName, + const Elf_Sym *sym, const Elf_Shdr *sectionHdr, + ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) override { + return new (this->_readerStorage) HexagonELFDefinedAtom<ELFT>( + *this, symName, sectionName, sym, sectionHdr, contentData, + referenceStart, referenceEnd, referenceList); + } + + /// Process the Common symbol and create an atom for it. + ErrorOr<ELFCommonAtom<ELFT> *> + handleCommonSymbol(StringRef symName, const Elf_Sym *sym) override { + return new (this->_readerStorage) + HexagonELFCommonAtom<ELFT>(*this, symName, sym); + } +}; + +template <class ELFT> class HexagonDynamicFile : public DynamicFile<ELFT> { +public: + HexagonDynamicFile(const HexagonLinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_HEXAGON_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonELFReader.h b/lib/ReaderWriter/ELF/Hexagon/HexagonELFReader.h new file mode 100644 index 000000000000..1a4f891df799 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonELFReader.h @@ -0,0 +1,62 @@ +//===- lib/ReaderWriter/ELF/HexagonELFReader.h ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_HEXAGON_ELF_READER_H +#define LLD_READER_WRITER_HEXAGON_ELF_READER_H + +#include "ELFReader.h" +#include "HexagonELFFile.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, false> HexagonELFType; + +struct HexagonDynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + HexagonLinkingContext &ctx) { + return lld::elf::HexagonDynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct HexagonELFFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + HexagonLinkingContext &ctx) { + return lld::elf::HexagonELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +class HexagonELFObjectReader + : public ELFObjectReader<HexagonELFType, HexagonELFFileCreateELFTraits, + HexagonLinkingContext> { +public: + HexagonELFObjectReader(HexagonLinkingContext &ctx) + : ELFObjectReader<HexagonELFType, HexagonELFFileCreateELFTraits, + HexagonLinkingContext>(ctx, llvm::ELF::EM_HEXAGON) {} +}; + +class HexagonELFDSOReader + : public ELFDSOReader<HexagonELFType, HexagonDynamicFileCreateELFTraits, + HexagonLinkingContext> { +public: + HexagonELFDSOReader(HexagonLinkingContext &ctx) + : ELFDSOReader<HexagonELFType, HexagonDynamicFileCreateELFTraits, + HexagonLinkingContext>(ctx, llvm::ELF::EM_HEXAGON) {} +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_READER_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonELFWriters.h b/lib/ReaderWriter/ELF/Hexagon/HexagonELFWriters.h new file mode 100644 index 000000000000..96c74f72222d --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonELFWriters.h @@ -0,0 +1,61 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonELFWriters.h -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_ELF_WRITERS_H +#define HEXAGON_ELF_WRITERS_H + +#include "HexagonLinkingContext.h" +#include "OutputELFWriter.h" + +namespace lld { +namespace elf { + +template <class ELFT> class HexagonTargetLayout; + +template <typename ELFT> class HexagonELFWriter { +public: + HexagonELFWriter(HexagonLinkingContext &context, + HexagonTargetLayout<ELFT> &targetLayout) + : _hexagonLinkingContext(context), _hexagonTargetLayout(targetLayout) {} + +protected: + bool setELFHeader(ELFHeader<ELFT> &elfHeader) { + elfHeader.e_ident(llvm::ELF::EI_VERSION, 1); + elfHeader.e_ident(llvm::ELF::EI_OSABI, 0); + elfHeader.e_version(1); + elfHeader.e_flags(0x3); + return true; + } + + void finalizeHexagonRuntimeAtomValues() { + if (_hexagonLinkingContext.isDynamic()) { + auto gotAtomIter = + _hexagonTargetLayout.findAbsoluteAtom("_GLOBAL_OFFSET_TABLE_"); + auto gotpltSection = _hexagonTargetLayout.findOutputSection(".got.plt"); + if (gotpltSection) + (*gotAtomIter)->_virtualAddr = gotpltSection->virtualAddr(); + else + (*gotAtomIter)->_virtualAddr = 0; + auto dynamicAtomIter = _hexagonTargetLayout.findAbsoluteAtom("_DYNAMIC"); + auto dynamicSection = _hexagonTargetLayout.findOutputSection(".dynamic"); + if (dynamicSection) + (*dynamicAtomIter)->_virtualAddr = dynamicSection->virtualAddr(); + else + (*dynamicAtomIter)->_virtualAddr = 0; + } + } + +private: + HexagonLinkingContext &_hexagonLinkingContext; + HexagonTargetLayout<ELFT> &_hexagonTargetLayout; +}; + +} // elf +} // lld +#endif // HEXAGON_ELF_WRITERS_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonEncodings.h b/lib/ReaderWriter/ELF/Hexagon/HexagonEncodings.h new file mode 100644 index 000000000000..3e12786704a2 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonEncodings.h @@ -0,0 +1,601 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonEncodings.h -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +Instruction insn_encodings[] = { + { 0xffe00004, 0x40000000, 0x20f8, 0x0 }, + { 0xffe03080, 0x9ca03080, 0xf60, 0x0 }, + { 0xf9e00000, 0x48c00000, 0x61f20ff, 0x0 }, + { 0xf7c02300, 0x13802100, 0x3000fe, 0x0 }, + { 0xffe00000, 0x60c00000, 0x1f18, 0x0 }, + { 0xffe00000, 0x69c00000, 0x1f18, 0x0 }, + { 0xffe02000, 0x43000000, 0x7e0, 0x0 }, + { 0xff602060, 0x3e000060, 0x1f80, 0x0 }, + { 0xffe03000, 0x9ae01000, 0xf60, 0x0 }, + { 0xf9e00000, 0x91600000, 0x6003fe0, 0x0 }, + { 0xffe02084, 0xaf000084, 0x30078, 0x0 }, + { 0xff602060, 0x3e000020, 0x1f80, 0x0 }, + { 0xff602060, 0x3e200040, 0x1f80, 0x0 }, + { 0xf7c02000, 0x10c02000, 0x3000fe, 0x0 }, + { 0xffe00000, 0x60200000, 0x1f18, 0x0 }, + { 0xffe00000, 0x69200000, 0x1f18, 0x0 }, + { 0xffe038c0, 0xada00880, 0x3f, 0x0 }, + { 0xff602000, 0x73002000, 0x1fe0, 0x0 }, + { 0xf7c02000, 0x26c02000, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9f403880, 0x1f0100, 0x0 }, + { 0xf9e00000, 0x48400000, 0x61f20ff, 0x0 }, + { 0xffe02000, 0x41600000, 0x7e0, 0x0 }, + { 0xffe02084, 0xaf000080, 0x30078, 0x0 }, + { 0xf7c02300, 0x13800100, 0x3000fe, 0x0 }, + { 0xffe01804, 0x46a00000, 0x20f8, 0x0 }, + { 0xffe00004, 0x42400000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x22400000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x12402000, 0x3000fe, 0x0 }, + { 0xfc003d18, 0x28003c18, 0x3f00000, 0x1 }, + { 0xffe00000, 0x39000000, 0x201f, 0x0 }, + { 0xff601018, 0xdd400008, 0xfe0, 0x0 }, + { 0xffc0001c, 0x75400000, 0x203fe0, 0x0 }, + { 0xfc003fc7, 0x48003f47, 0x3f00000, 0x1 }, + { 0xffe03080, 0x9ca03000, 0xf60, 0x0 }, + { 0xf9e00000, 0x90800000, 0x6003fe0, 0x0 }, + { 0xf8003fc7, 0x40003fc4, 0x7f00000, 0x1 }, + { 0xfc003e00, 0x68003c00, 0x3f00000, 0x1 }, + { 0xf8003fc7, 0x40003fc5, 0x7f00000, 0x1 }, + { 0xf9e00000, 0x91800000, 0x6003fe0, 0x0 }, + { 0xff602060, 0x3e400060, 0x1f80, 0x0 }, + { 0xff602060, 0x3e000000, 0x1f80, 0x0 }, + { 0xf8003d18, 0x20003c18, 0x7f00000, 0x1 }, + { 0xf8003f00, 0x20003800, 0x7f00000, 0x1 }, + { 0xf8003d18, 0x20003c10, 0x7f00000, 0x1 }, + { 0xff602000, 0x73602000, 0x1fe0, 0x0 }, + { 0xffe03880, 0x9f002080, 0x1f0100, 0x0 }, + { 0xffe02000, 0x47000000, 0x7e0, 0x0 }, + { 0xf9e00000, 0x91400000, 0x6003fe0, 0x0 }, + { 0xffe02080, 0xabc00080, 0x3f, 0x0 }, + { 0xf7c02000, 0x20802000, 0x3000fe, 0x0 }, + { 0xf8003fc7, 0x40003f44, 0x7f00000, 0x1 }, + { 0xffe03884, 0xafa03084, 0x30078, 0x0 }, + { 0xffe03000, 0x9b001000, 0xf60, 0x0 }, + { 0xffe01804, 0x42a00800, 0x20f8, 0x0 }, + { 0xfc003f00, 0x28003100, 0x3f00000, 0x1 }, + { 0xffe02080, 0xab800080, 0x3f, 0x0 }, + { 0xf7c02000, 0x24c00000, 0x3000fe, 0x0 }, + { 0xffe00000, 0x39a00000, 0x201f, 0x0 }, + { 0xf7c02300, 0x13802300, 0x3000fe, 0x0 }, + { 0xffe01804, 0x46a00800, 0x20f8, 0x0 }, + { 0xffe020c0, 0xad602080, 0x3f, 0x0 }, + { 0xfc003f00, 0x28003500, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x28003400, 0x3f00000, 0x1 }, + { 0xffe020c0, 0xad6000c0, 0x3f, 0x0 }, + { 0xffe00000, 0x60000000, 0x1f18, 0x0 }, + { 0xf8003000, 0x40000000, 0x7f00000, 0x1 }, + { 0xffe00000, 0x69000000, 0x1f18, 0x0 }, + { 0xffe03080, 0x9c601080, 0xf60, 0x0 }, + { 0xffe03080, 0x9ce01000, 0xf60, 0x0 }, + { 0xffe03080, 0x9c601000, 0xf60, 0x0 }, + { 0xf7c02000, 0x13402000, 0x3000fe, 0x0 }, + { 0xffe03080, 0x9c603000, 0xf60, 0x0 }, + { 0xf7c02000, 0x21c00000, 0x3000fe, 0x0 }, + { 0xfc003000, 0x68000000, 0x3f00000, 0x1 }, + { 0xf8003800, 0x60002000, 0x7f00000, 0x1 }, + { 0xffe02084, 0xaf802084, 0x30078, 0x0 }, + { 0xfc003000, 0x48000000, 0x3f00000, 0x1 }, + { 0xf7c02300, 0x11c02100, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x12800000, 0x3000fe, 0x0 }, + { 0xfc003e70, 0x28003a40, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x28003300, 0x3f00000, 0x1 }, + { 0xff800000, 0xe0000000, 0x1fe0, 0x0 }, + { 0xff602060, 0x3f400000, 0x1f80, 0x0 }, + { 0xffe00004, 0x42000000, 0x20f8, 0x0 }, + { 0xf8003f00, 0x60003300, 0x7f00000, 0x1 }, + { 0xffe01804, 0x42a00000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x12c00000, 0x3000fe, 0x0 }, + { 0xf0000000, 0x0, 0xfff3fff, 0x0 }, + { 0xff000016, 0xde000016, 0xe020e8, 0x0 }, + { 0xffe03000, 0x9b201000, 0xf60, 0x0 }, + { 0xffe03880, 0xaba00880, 0x3f, 0x0 }, + { 0xf8003e00, 0x40003c00, 0x7f00000, 0x1 }, + { 0xff602060, 0x3f200040, 0x1f80, 0x0 }, + { 0xffe03880, 0x9f203880, 0x1f0100, 0x0 }, + { 0xf7c02000, 0x20c00000, 0x3000fe, 0x0 }, + { 0xf9e01800, 0x48a00800, 0x61f20ff, 0x0 }, + { 0xf9e00000, 0x90a00000, 0x6003fe0, 0x0 }, + { 0xff802000, 0x74802000, 0x1fe0, 0x0 }, + { 0xffe03000, 0x9a401000, 0xf60, 0x0 }, + { 0xf7c02000, 0x10002000, 0x3000fe, 0x0 }, + { 0xf7c03000, 0x14803000, 0x3000fe, 0x0 }, + { 0xffe020c0, 0xad0020c0, 0x3f, 0x0 }, + { 0xffe0001c, 0x75800000, 0x3fe0, 0x0 }, + { 0xf9e01800, 0x48a01000, 0x61f20ff, 0x0 }, + { 0xffe03080, 0x9dc03000, 0xf60, 0x0 }, + { 0xffe03080, 0x9dc03080, 0xf60, 0x0 }, + { 0xffe03080, 0x9dc01000, 0xf60, 0x0 }, + { 0xffe03080, 0x9dc01080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d601000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d601080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d603000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d603080, 0xf60, 0x0 }, + { 0xfc003e00, 0x48003c00, 0x3f00000, 0x1 }, + { 0xffe02084, 0xaf402084, 0x30078, 0x0 }, + { 0xffe00004, 0x46600000, 0x20f8, 0x0 }, + { 0xffe03880, 0x9f203080, 0x1f0100, 0x0 }, + { 0xf8003f00, 0x20003100, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x11402000, 0x3000fe, 0x0 }, + { 0xf8003d08, 0x20003d00, 0x7f00000, 0x1 }, + { 0xffe03080, 0x9ca01080, 0xf60, 0x0 }, + { 0xffe03080, 0x9ca01000, 0xf60, 0x0 }, + { 0xffe00000, 0x38a00000, 0x201f, 0x0 }, + { 0xf7c02300, 0x11800000, 0x3000fe, 0x0 }, + { 0xf7c02300, 0x13c02300, 0x3000fe, 0x0 }, + { 0xffe03080, 0x9ce03000, 0xf60, 0x0 }, + { 0xf9e00000, 0x90e00000, 0x6003fe0, 0x0 }, + { 0xffe02084, 0xaf400080, 0x30078, 0x0 }, + { 0xffe03080, 0x9ce03080, 0xf60, 0x0 }, + { 0xff000000, 0x78000000, 0xdf3fe0, 0x0 }, + { 0xffe03080, 0x9ce01080, 0xf60, 0x0 }, + { 0xffe03880, 0xaba01080, 0x3f, 0x0 }, + { 0xffe020c0, 0xad002080, 0x3f, 0x0 }, + { 0xffe020c0, 0xad0000c0, 0x3f, 0x0 }, + { 0xffe020c0, 0xad000080, 0x3f, 0x0 }, + { 0xf7c02000, 0x25000000, 0x3000fe, 0x0 }, + { 0xff602060, 0x3f200020, 0x1f80, 0x0 }, + { 0xffe02084, 0xafc00084, 0x30078, 0x0 }, + { 0xf7c02000, 0x24400000, 0x3000fe, 0x0 }, + { 0xfc003000, 0x48001000, 0x3f00000, 0x1 }, + { 0xf9e01800, 0xa1a01000, 0x60020ff, 0x0 }, + { 0xff602060, 0x3f000040, 0x1f80, 0x0 }, + { 0xffe02084, 0xaf602084, 0x30078, 0x0 }, + { 0xf8003f00, 0x20003400, 0x7f00000, 0x1 }, + { 0xffe02084, 0xaf400084, 0x30078, 0x0 }, + { 0xffe01804, 0x44a01000, 0x20f8, 0x0 }, + { 0xff602060, 0x3e200000, 0x1f80, 0x0 }, + { 0xf8003e70, 0x20003a70, 0x7f00000, 0x1 }, + { 0xf8003f00, 0x40003e00, 0x7f00000, 0x1 }, + { 0xf8003f00, 0x20003300, 0x7f00000, 0x1 }, + { 0xf7c02300, 0x13800300, 0x3000fe, 0x0 }, + { 0xffe038c0, 0xada00080, 0x3f, 0x0 }, + { 0xf9e00000, 0x49400000, 0x61f3fe0, 0x0 }, + { 0xf8003800, 0x40002800, 0x7f00000, 0x1 }, + { 0xffe038c0, 0xada020c0, 0x3f, 0x0 }, + { 0xffe03884, 0xafa00880, 0x30078, 0x0 }, + { 0xf9e00000, 0x49000000, 0x61f3fe0, 0x0 }, + { 0xff800000, 0xd7000000, 0x6020e0, 0x0 }, + { 0xffc00000, 0xda000000, 0x203fe0, 0x0 }, + { 0xf7c02000, 0x12802000, 0x3000fe, 0x0 }, + { 0xf9e00000, 0x49600000, 0x61f3fe0, 0x0 }, + { 0xffe02000, 0x47400000, 0x7e0, 0x0 }, + { 0xf9e00000, 0x49c00000, 0x61f3fe0, 0x0 }, + { 0xffe03000, 0x9bc01000, 0xf60, 0x0 }, + { 0xf7c02300, 0x13c00100, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9f002880, 0x1f0100, 0x0 }, + { 0xffe03000, 0x9b601000, 0xf60, 0x0 }, + { 0xffe01804, 0x40a00800, 0x20f8, 0x0 }, + { 0xffe00004, 0x42800000, 0x20f8, 0x0 }, + { 0xf7c03000, 0x14800000, 0x3000fe, 0x0 }, + { 0xfc003000, 0x68001000, 0x3f00000, 0x1 }, + { 0xfc003fc7, 0x48003f44, 0x3f00000, 0x1 }, + { 0xfc003fc7, 0x48003f45, 0x3f00000, 0x1 }, + { 0xf7c02000, 0x10800000, 0x3000fe, 0x0 }, + { 0xf8003e70, 0x20003a50, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x21002000, 0x3000fe, 0x0 }, + { 0xf8003fc4, 0x40003fc0, 0x7f00000, 0x1 }, + { 0xf9e00000, 0x48000000, 0x61f20ff, 0x0 }, + { 0xffc0001c, 0x75000010, 0x203fe0, 0x0 }, + { 0xf8003f00, 0x20003800, 0x7f00000, 0x1 }, + { 0xf9e00000, 0xa1800000, 0x60020ff, 0x0 }, + { 0xffc01000, 0x61c00000, 0x202ffe, 0x0 }, + { 0xffe02084, 0xaf402080, 0x30078, 0x0 }, + { 0xffe03880, 0x9f602880, 0x1f0100, 0x0 }, + { 0xfc003f00, 0x68003000, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x68003100, 0x3f00000, 0x1 }, + { 0xff602060, 0x3f200000, 0x1f80, 0x0 }, + { 0xffe03000, 0x9a801000, 0xf60, 0x0 }, + { 0xf7c02000, 0x24802000, 0x3000fe, 0x0 }, + { 0xffe00004, 0x42c00000, 0x20f8, 0x0 }, + { 0xf7c02300, 0x11802000, 0x3000fe, 0x0 }, + { 0xffc01000, 0x61401000, 0x202ffe, 0x0 }, + { 0xffe02000, 0x43c00000, 0x7e0, 0x0 }, + { 0xf7c02000, 0x11400000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x21800000, 0x3000fe, 0x0 }, + { 0xfc003c00, 0x28002c00, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x28003200, 0x3f00000, 0x1 }, + { 0xffe03080, 0x9c803080, 0xf60, 0x0 }, + { 0xf7c03000, 0x14c03000, 0x3000fe, 0x0 }, + { 0xff800000, 0xdb800000, 0x6020e0, 0x0 }, + { 0xf7c02000, 0x22402000, 0x3000fe, 0x0 }, + { 0xffe00004, 0x46800000, 0x20f8, 0x0 }, + { 0xffe00000, 0x69a00000, 0x1f18, 0x0 }, + { 0xfc003e00, 0x68002a00, 0x3f00000, 0x1 }, + { 0xffe00000, 0x60a00000, 0x1f18, 0x0 }, + { 0xf7c02000, 0x25400000, 0x3000fe, 0x0 }, + { 0xfc003e70, 0x28003a70, 0x3f00000, 0x1 }, + { 0xffe03080, 0x9c803000, 0xf60, 0x0 }, + { 0xffc01000, 0x61400000, 0x202ffe, 0x0 }, + { 0xffe01804, 0x42a01000, 0x20f8, 0x0 }, + { 0xffc0001c, 0x75000000, 0x203fe0, 0x0 }, + { 0xffe02084, 0xafc02080, 0x30078, 0x0 }, + { 0xffe03884, 0xafa00884, 0x30078, 0x0 }, + { 0xffe03884, 0xafa02080, 0x30078, 0x0 }, + { 0xffe00000, 0x38c00000, 0x201f, 0x0 }, + { 0xffc01000, 0x61001000, 0x202ffe, 0x0 }, + { 0xf9e00000, 0x48800000, 0x61f20ff, 0x0 }, + { 0xf8003800, 0x40003000, 0x7f00000, 0x1 }, + { 0xf7c03000, 0x15403000, 0x3000fe, 0x0 }, + { 0xf7c03000, 0x15400000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x21000000, 0x3000fe, 0x0 }, + { 0xffe00004, 0x40c00000, 0x20f8, 0x0 }, + { 0xffe01804, 0x46a01000, 0x20f8, 0x0 }, + { 0xf8003d08, 0x20003d08, 0x7f00000, 0x1 }, + { 0xffe038c0, 0xada02080, 0x3f, 0x0 }, + { 0xffe03080, 0x9c203000, 0xf60, 0x0 }, + { 0xfc003800, 0x68002000, 0x3f00000, 0x1 }, + { 0xf9e00000, 0x90600000, 0x6003fe0, 0x0 }, + { 0xf7c03000, 0x14000000, 0x3000fe, 0x0 }, + { 0xf8003e70, 0x20003a40, 0x7f00000, 0x1 }, + { 0xff201800, 0x5c000800, 0xdf20fe, 0x0 }, + { 0xffe02000, 0x41800000, 0x7e0, 0x0 }, + { 0xff800000, 0xdb000000, 0x6020e0, 0x0 }, + { 0xfc003f00, 0x48003e00, 0x3f00000, 0x1 }, + { 0xf7c03000, 0x14002000, 0x3000fe, 0x0 }, + { 0xf7c02300, 0x11800100, 0x3000fe, 0x0 }, + { 0xfc003e00, 0x68002800, 0x3f00000, 0x1 }, + { 0xffe00004, 0x44c00000, 0x20f8, 0x0 }, + { 0xffe03880, 0x9f003880, 0x1f0100, 0x0 }, + { 0xff602000, 0x73402000, 0x1fe0, 0x0 }, + { 0xffe00000, 0x38200000, 0x201f, 0x0 }, + { 0xf7c02000, 0x24800000, 0x3000fe, 0x0 }, + { 0xf7c03000, 0x15001000, 0x3000fe, 0x0 }, + { 0xff800000, 0x7c800000, 0x1f2000, 0x0 }, + { 0xf8003fc7, 0x40003fc6, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x12000000, 0x3000fe, 0x0 }, + { 0xff602000, 0x73202000, 0x1fe0, 0x0 }, + { 0xf7c02300, 0x13c00000, 0x3000fe, 0x0 }, + { 0xff602060, 0x3f400040, 0x1f80, 0x0 }, + { 0xf7c02000, 0x24002000, 0x3000fe, 0x0 }, + { 0xffe02084, 0xaf800080, 0x30078, 0x0 }, + { 0xffe00000, 0x38800000, 0x201f, 0x0 }, + { 0xfc003f00, 0x28003800, 0x3f00000, 0x1 }, + { 0xffe03080, 0x9c801080, 0xf60, 0x0 }, + { 0xffe020c0, 0xad4000c0, 0x3f, 0x0 }, + { 0xffe00000, 0x39400000, 0x201f, 0x0 }, + { 0xf7c02300, 0x13c02100, 0x3000fe, 0x0 }, + { 0xffe020c0, 0xad400080, 0x3f, 0x0 }, + { 0xffe03880, 0x9f603880, 0x1f0100, 0x0 }, + { 0xff000016, 0xde000002, 0xe020e8, 0x0 }, + { 0xfc003d08, 0x28003d00, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x28003000, 0x3f00000, 0x1 }, + { 0xffe03080, 0x9c401000, 0xf60, 0x0 }, + { 0xf7c02000, 0x21402000, 0x3000fe, 0x0 }, + { 0xff201800, 0x5c200800, 0xdf20fe, 0x0 }, + { 0xffe01804, 0x40a01000, 0x20f8, 0x0 }, + { 0xfc003f00, 0x68003300, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x68003200, 0x3f00000, 0x1 }, + { 0xf7c03000, 0x15401000, 0x3000fe, 0x0 }, + { 0xffe01804, 0x44a00800, 0x20f8, 0x0 }, + { 0xf7c02000, 0x26000000, 0x3000fe, 0x0 }, + { 0xffc00000, 0xda400000, 0x203fe0, 0x0 }, + { 0xffe00004, 0x40600000, 0x20f8, 0x0 }, + { 0xffe02080, 0xab600080, 0x3f, 0x0 }, + { 0xf8003f00, 0x20003600, 0x7f00000, 0x1 }, + { 0xf7c02300, 0x11c00300, 0x3000fe, 0x0 }, + { 0xf8003f00, 0x20003700, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x25c00000, 0x3000fe, 0x0 }, + { 0xf7c02300, 0x11800300, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9f802880, 0x1f0100, 0x0 }, + { 0xfc003800, 0x48003000, 0x3f00000, 0x1 }, + { 0xf8003c00, 0x20002c00, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x10400000, 0x3000fe, 0x0 }, + { 0xff602060, 0x3f400060, 0x1f80, 0x0 }, + { 0xffe03080, 0x9c801000, 0xf60, 0x0 }, + { 0xff602060, 0x3e400040, 0x1f80, 0x0 }, + { 0xf7c03000, 0x14402000, 0x3000fe, 0x0 }, + { 0xffe0001c, 0x75800010, 0x3fe0, 0x0 }, + { 0xff000016, 0xde000014, 0xe020e8, 0x0 }, + { 0xf7c02300, 0x11c02000, 0x3000fe, 0x0 }, + { 0xff600018, 0xdd200008, 0x1fe0, 0x0 }, + { 0xff602060, 0x3e200060, 0x1f80, 0x0 }, + { 0xff000016, 0xde000006, 0xe020e8, 0x0 }, + { 0xffe00004, 0x44600000, 0x20f8, 0x0 }, + { 0xf8003e00, 0x60002800, 0x7f00000, 0x1 }, + { 0xfe600000, 0x3c000000, 0x207f, 0x0 }, + { 0xffe03884, 0xafa02884, 0x30078, 0x0 }, + { 0xf7c02300, 0x11802300, 0x3000fe, 0x0 }, + { 0xffe00000, 0x38000000, 0x201f, 0x0 }, + { 0xff200800, 0x5c000000, 0xdf20fe, 0x0 }, + { 0xf7c02000, 0x13400000, 0x3000fe, 0x0 }, + { 0xff200800, 0x5c200000, 0xdf20fe, 0x0 }, + { 0xffe02000, 0x41000000, 0x7e0, 0x0 }, + { 0xffe03880, 0x9fc02880, 0x1f0100, 0x0 }, + { 0xffe00004, 0x46000000, 0x20f8, 0x0 }, + { 0xff602060, 0x3f000020, 0x1f80, 0x0 }, + { 0xfc003d08, 0x28003d08, 0x3f00000, 0x1 }, + { 0xff602060, 0x3f200060, 0x1f80, 0x0 }, + { 0xffe038c0, 0xada028c0, 0x3f, 0x0 }, + { 0xffe038c0, 0xada008c0, 0x3f, 0x0 }, + { 0xf8003f00, 0x20003500, 0x7f00000, 0x1 }, + { 0xfc003fc4, 0x48003f40, 0x3f00000, 0x1 }, + { 0xf9e01800, 0x48a00000, 0x61f20ff, 0x0 }, + { 0xf7c03000, 0x14802000, 0x3000fe, 0x0 }, + { 0xfc003f00, 0x28003900, 0x3f00000, 0x1 }, + { 0xf8003fc7, 0x40003fc7, 0x7f00000, 0x1 }, + { 0xffe02000, 0x45400000, 0x7e0, 0x0 }, + { 0xffe038c0, 0xada02880, 0x3f, 0x0 }, + { 0xffe02084, 0xaf002080, 0x30078, 0x0 }, + { 0xffe03880, 0x9f803880, 0x1f0100, 0x0 }, + { 0xf7c03000, 0x15000000, 0x3000fe, 0x0 }, + { 0xfc003f00, 0x28003700, 0x3f00000, 0x1 }, + { 0xfc003f00, 0x28003600, 0x3f00000, 0x1 }, + { 0xffe02000, 0x47200000, 0x7e0, 0x0 }, + { 0xffe03880, 0xaba00080, 0x3f, 0x0 }, + { 0xffe02084, 0xafc00080, 0x30078, 0x0 }, + { 0xff802000, 0x73800000, 0x1fe0, 0x0 }, + { 0xffe03880, 0x9f202880, 0x1f0100, 0x0 }, + { 0xf8003d18, 0x20003c00, 0x7f00000, 0x1 }, + { 0xf9e00000, 0xa1600000, 0x60020ff, 0x0 }, + { 0xffe00004, 0x44800000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x21802000, 0x3000fe, 0x0 }, + { 0xff000000, 0xd8000000, 0x6020e0, 0x0 }, + { 0xf9e00000, 0xa1000000, 0x60020ff, 0x0 }, + { 0xffe03884, 0xafa00084, 0x30078, 0x0 }, + { 0xff201800, 0x5c201800, 0xdf20fe, 0x0 }, + { 0xff000016, 0xde000010, 0xe020e8, 0x0 }, + { 0xffe03880, 0x9f603080, 0x1f0100, 0x0 }, + { 0xffe02000, 0x41c00000, 0x7e0, 0x0 }, + { 0xf7c02000, 0x20402000, 0x3000fe, 0x0 }, + { 0xff800000, 0xe1000000, 0x1fe0, 0x0 }, + { 0xf9e00000, 0xa1400000, 0x60020ff, 0x0 }, + { 0xf7c03000, 0x14c00000, 0x3000fe, 0x0 }, + { 0xf8003fc7, 0x40003f47, 0x7f00000, 0x1 }, + { 0xffe00004, 0x40800000, 0x20f8, 0x0 }, + { 0xff800000, 0xe1800000, 0x1fe0, 0x0 }, + { 0xf7c02300, 0x11802100, 0x3000fe, 0x0 }, + { 0xf9e00000, 0x49800000, 0x61f3fe0, 0x0 }, + { 0xf7c02000, 0x26400000, 0x3000fe, 0x0 }, + { 0xf8003c00, 0x20002800, 0x7f00000, 0x1 }, + { 0xff902000, 0x7e002000, 0xf1fe0, 0x0 }, + { 0xff902000, 0x7e802000, 0xf1fe0, 0x0 }, + { 0xf9e00000, 0x91c00000, 0x6003fe0, 0x0 }, + { 0xffe03884, 0xafa02880, 0x30078, 0x0 }, + { 0xf7c02000, 0x22000000, 0x3000fe, 0x0 }, + { 0xffe03080, 0x9d203000, 0xf60, 0x0 }, + { 0xf7c02000, 0x26002000, 0x3000fe, 0x0 }, + { 0xff800000, 0xe2000000, 0x1fe0, 0x0 }, + { 0xf7c02000, 0x26c00000, 0x3000fe, 0x0 }, + { 0xff602060, 0x3e400000, 0x1f80, 0x0 }, + { 0xffe00000, 0x38400000, 0x201f, 0x0 }, + { 0xfc003800, 0x48002000, 0x3f00000, 0x1 }, + { 0xff000016, 0xde000000, 0xe020e8, 0x0 }, + { 0xf8003f00, 0x20003000, 0x7f00000, 0x1 }, + { 0xf8003e70, 0x20003a60, 0x7f00000, 0x1 }, + { 0xff902000, 0x7e800000, 0xf1fe0, 0x0 }, + { 0xffe020c0, 0xad6020c0, 0x3f, 0x0 }, + { 0xf7c02300, 0x13802000, 0x3000fe, 0x0 }, + { 0xffe020c0, 0xad600080, 0x3f, 0x0 }, + { 0xff902000, 0x7e000000, 0xf1fe0, 0x0 }, + { 0xf7000000, 0x17000000, 0x3000fe, 0x0 }, + { 0xf7000000, 0x16000000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x25002000, 0x3000fe, 0x0 }, + { 0xfc003fc7, 0x48003fc7, 0x3f00000, 0x1 }, + { 0xffc01000, 0x61801000, 0x202ffe, 0x0 }, + { 0xffe03884, 0xafa03080, 0x30078, 0x0 }, + { 0xf8003fc4, 0x40003f40, 0x7f00000, 0x1 }, + { 0xfc003e70, 0x28003a60, 0x3f00000, 0x1 }, + { 0xf7c02300, 0x13800000, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9f802080, 0x1f0100, 0x0 }, + { 0xf0000000, 0xb0000000, 0xfe03fe0, 0x0 }, + { 0xffe03880, 0x9f402080, 0x1f0100, 0x0 }, + { 0xffe02000, 0x43200000, 0x7e0, 0x0 }, + { 0xffe00000, 0x39800000, 0x201f, 0x0 }, + { 0xffe03880, 0x9fc03880, 0x1f0100, 0x0 }, + { 0xffe02000, 0x45600000, 0x7e0, 0x0 }, + { 0xf9e00000, 0x91200000, 0x6003fe0, 0x0 }, + { 0xffe02000, 0x43600000, 0x7e0, 0x0 }, + { 0xfc003f00, 0x28003800, 0x3f00000, 0x1 }, + { 0xff802000, 0x74000000, 0x1fe0, 0x0 }, + { 0xffe02084, 0xaf002084, 0x30078, 0x0 }, + { 0xff802000, 0x74800000, 0x1fe0, 0x0 }, + { 0xf7c03000, 0x14c02000, 0x3000fe, 0x0 }, + { 0xfe000001, 0x5a000000, 0x1ff3ffe, 0x0 }, + { 0xff602060, 0x3f400020, 0x1f80, 0x0 }, + { 0xf7c02000, 0x10802000, 0x3000fe, 0x0 }, + { 0xffe02084, 0xaf802080, 0x30078, 0x0 }, + { 0xffe00004, 0x46400000, 0x20f8, 0x0 }, + { 0xffe020c0, 0xad800080, 0x3f, 0x0 }, + { 0xffe020c0, 0xad8000c0, 0x3f, 0x0 }, + { 0xf8003fc7, 0x40003f45, 0x7f00000, 0x1 }, + { 0xf8003e00, 0x60002a00, 0x7f00000, 0x1 }, + { 0xffe02084, 0xaf600084, 0x30078, 0x0 }, + { 0xffe03080, 0x9c201000, 0xf60, 0x0 }, + { 0xffe02000, 0x43400000, 0x7e0, 0x0 }, + { 0xffe03080, 0x9c203080, 0xf60, 0x0 }, + { 0xffe02000, 0x41200000, 0x7e0, 0x0 }, + { 0xffe03080, 0x9c201080, 0xf60, 0x0 }, + { 0xf7c02300, 0x11c02300, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9fc03080, 0x1f0100, 0x0 }, + { 0xffe03880, 0x9f402880, 0x1f0100, 0x0 }, + { 0xf8003800, 0x40002000, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x24402000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x20c02000, 0x3000fe, 0x0 }, + { 0xf7c02300, 0x11c00000, 0x3000fe, 0x0 }, + { 0xffe02000, 0x45200000, 0x7e0, 0x0 }, + { 0xf8003f00, 0x20003900, 0x7f00000, 0x1 }, + { 0xf7c02300, 0x11c00100, 0x3000fe, 0x0 }, + { 0xffe02084, 0xaf800084, 0x30078, 0x0 }, + { 0xfe600000, 0x3c200000, 0x207f, 0x0 }, + { 0xf7c02000, 0x26800000, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9f003080, 0x1f0100, 0x0 }, + { 0xffe03884, 0xafa01084, 0x30078, 0x0 }, + { 0xffc00000, 0x76000000, 0x203fe0, 0x0 }, + { 0xff602060, 0x3e000040, 0x1f80, 0x0 }, + { 0xffe020c0, 0xadc020c0, 0x3f, 0x0 }, + { 0xffe00004, 0x44400000, 0x20f8, 0x0 }, + { 0xffe020c0, 0xadc02080, 0x3f, 0x0 }, + { 0xfe600000, 0x3c400000, 0x207f, 0x0 }, + { 0xf7c02000, 0x20400000, 0x3000fe, 0x0 }, + { 0xff800000, 0x7c000000, 0x1fe0, 0x0 }, + { 0xffe03884, 0xafa00080, 0x30078, 0x0 }, + { 0xff201800, 0x5c001800, 0xdf20fe, 0x0 }, + { 0xffe02000, 0x47800000, 0x7e0, 0x0 }, + { 0xff601018, 0xdd400000, 0xfe0, 0x0 }, + { 0xffe020c0, 0xad4020c0, 0x3f, 0x0 }, + { 0xffe020c0, 0xad402080, 0x3f, 0x0 }, + { 0xf8003000, 0x40001000, 0x7f00000, 0x1 }, + { 0xffe02084, 0xafc02084, 0x30078, 0x0 }, + { 0xffe03080, 0x9c403080, 0xf60, 0x0 }, + { 0xfc003e40, 0x28003a00, 0x3f00000, 0x1 }, + { 0xffe038c0, 0xada010c0, 0x3f, 0x0 }, + { 0xffe038c0, 0xada01080, 0x3f, 0x0 }, + { 0xffe038c0, 0xada030c0, 0x3f, 0x0 }, + { 0xffe038c0, 0xada03080, 0x3f, 0x0 }, + { 0xf7c02000, 0x20800000, 0x3000fe, 0x0 }, + { 0xfc003fc7, 0x48003f46, 0x3f00000, 0x1 }, + { 0xffe01804, 0x44a00000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x20002000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x12c02000, 0x3000fe, 0x0 }, + { 0xffe03000, 0x9a601000, 0xf60, 0x0 }, + { 0xffc00000, 0xda800000, 0x203fe0, 0x0 }, + { 0xf9e00000, 0x90400000, 0x6003fe0, 0x0 }, + { 0xffe02000, 0x47600000, 0x7e0, 0x0 }, + { 0xffe03080, 0x9d403000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d403080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d401000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d401080, 0xf60, 0x0 }, + { 0xffe02000, 0x41400000, 0x7e0, 0x0 }, + { 0xff800000, 0xdf800000, 0x6020e0, 0x0 }, + { 0xffc01000, 0x61000000, 0x202ffe, 0x0 }, + { 0xffe03880, 0x9f202080, 0x1f0100, 0x0 }, + { 0xfc003fc7, 0x48003fc6, 0x3f00000, 0x1 }, + { 0xfe000000, 0x7a000000, 0x1fe0, 0x0 }, + { 0xffff0000, 0x6a490000, 0x1f80, 0x0 }, + { 0xff802000, 0x73000000, 0x1fe0, 0x0 }, + { 0xff602060, 0x3e200020, 0x1f80, 0x0 }, + { 0xf7c02000, 0x24000000, 0x3000fe, 0x0 }, + { 0xf8003e40, 0x20003a00, 0x7f00000, 0x1 }, + { 0xf7c03000, 0x14401000, 0x3000fe, 0x0 }, + { 0xf8003f00, 0x20003200, 0x7f00000, 0x1 }, + { 0xffc00000, 0x76400000, 0x203fe0, 0x0 }, + { 0xf7c02000, 0x22002000, 0x3000fe, 0x0 }, + { 0xffc01000, 0x61c01000, 0x202ffe, 0x0 }, + { 0xf7c03000, 0x14801000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x12002000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x10402000, 0x3000fe, 0x0 }, + { 0xff201800, 0x5d200000, 0xdf20fe, 0x0 }, + { 0xf7c02000, 0x21400000, 0x3000fe, 0x0 }, + { 0xff201800, 0x5d000000, 0xdf20fe, 0x0 }, + { 0xffe02000, 0x45c00000, 0x7e0, 0x0 }, + { 0xf7c02000, 0x25802000, 0x3000fe, 0x0 }, + { 0xfc003e70, 0x28003a50, 0x3f00000, 0x1 }, + { 0xf7c02300, 0x13c00300, 0x3000fe, 0x0 }, + { 0xf9e01800, 0xa1a00800, 0x60020ff, 0x0 }, + { 0xffe02000, 0x43800000, 0x7e0, 0x0 }, + { 0xfc003fc4, 0x48003fc0, 0x3f00000, 0x1 }, + { 0xff800000, 0xe2800000, 0x1fe0, 0x0 }, + { 0xf7c02300, 0x13c02000, 0x3000fe, 0x0 }, + { 0xffe03080, 0x9d803080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d803000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d801080, 0xf60, 0x0 }, + { 0xf8003fc4, 0x40003f00, 0x7f00000, 0x1 }, + { 0xffe00000, 0x39c00000, 0x201f, 0x0 }, + { 0xffe03080, 0x9d203080, 0xf60, 0x0 }, + { 0xffe02080, 0xab000080, 0x3f, 0x0 }, + { 0xf8003e00, 0x60003c00, 0x7f00000, 0x1 }, + { 0xffe03880, 0x9f602080, 0x1f0100, 0x0 }, + { 0xffc00000, 0x76800000, 0x203fe0, 0x0 }, + { 0xffe03884, 0xafa02084, 0x30078, 0x0 }, + { 0xf7c02000, 0x13002000, 0x3000fe, 0x0 }, + { 0xf9e00000, 0x91000000, 0x6003fe0, 0x0 }, + { 0xffe03080, 0x9d201080, 0xf60, 0x0 }, + { 0xf7c03000, 0x15002000, 0x3000fe, 0x0 }, + { 0xf8003000, 0x60000000, 0x7f00000, 0x1 }, + { 0xffc01000, 0x61800000, 0x202ffe, 0x0 }, + { 0xf7c03000, 0x14400000, 0x3000fe, 0x0 }, + { 0xffe03000, 0x9b401000, 0xf60, 0x0 }, + { 0xf7c03000, 0x14003000, 0x3000fe, 0x0 }, + { 0xffe03880, 0x9fc02080, 0x1f0100, 0x0 }, + { 0xfc003fc4, 0x48003f00, 0x3f00000, 0x1 }, + { 0xffe02000, 0x45000000, 0x7e0, 0x0 }, + { 0xfc003800, 0x48002800, 0x3f00000, 0x1 }, + { 0xfc003fc7, 0x48003fc5, 0x3f00000, 0x1 }, + { 0xfc003d18, 0x28003c00, 0x3f00000, 0x1 }, + { 0xfc003fc7, 0x48003fc4, 0x3f00000, 0x1 }, + { 0xf8003f00, 0x60003200, 0x7f00000, 0x1 }, + { 0xffe02084, 0xaf600080, 0x30078, 0x0 }, + { 0xf9e01800, 0xa1a00000, 0x60020ff, 0x0 }, + { 0xf7c03000, 0x14001000, 0x3000fe, 0x0 }, + { 0xf7c03000, 0x14c01000, 0x3000fe, 0x0 }, + { 0xffe00004, 0x46c00000, 0x20f8, 0x0 }, + { 0xf7c03000, 0x15003000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x10000000, 0x3000fe, 0x0 }, + { 0xf8003d18, 0x20003c08, 0x7f00000, 0x1 }, + { 0xffc0001c, 0x75400010, 0x203fe0, 0x0 }, + { 0xf9e00000, 0x48600000, 0x61f20ff, 0x0 }, + { 0xffe03080, 0x9c603080, 0xf60, 0x0 }, + { 0xfe000000, 0x58000000, 0x1ff3ffe, 0x0 }, + { 0xffe03000, 0x9a201000, 0xf60, 0x0 }, + { 0xffe00000, 0x69e00000, 0x1f18, 0x0 }, + { 0xffe020c0, 0xad802080, 0x3f, 0x0 }, + { 0xffe02000, 0x47c00000, 0x7e0, 0x0 }, + { 0xffe00000, 0x60e00000, 0x1f18, 0x0 }, + { 0xf7c03000, 0x15402000, 0x3000fe, 0x0 }, + { 0xffe020c0, 0xad8020c0, 0x3f, 0x0 }, + { 0xff000016, 0xde000012, 0xe020e8, 0x0 }, + { 0xf7c02000, 0x25c02000, 0x3000fe, 0x0 }, + { 0xf8003f00, 0x60003100, 0x7f00000, 0x1 }, + { 0xf8003f00, 0x60003000, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x25800000, 0x3000fe, 0x0 }, + { 0xf7c03000, 0x14403000, 0x3000fe, 0x0 }, + { 0xfc003d18, 0x28003c08, 0x3f00000, 0x1 }, + { 0xffe03880, 0x9f403080, 0x1f0100, 0x0 }, + { 0xf7c02000, 0x25402000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x10c00000, 0x3000fe, 0x0 }, + { 0xffe02000, 0x45800000, 0x7e0, 0x0 }, + { 0xffe03880, 0x9f803080, 0x1f0100, 0x0 }, + { 0xffe03080, 0x9d001000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d001080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d003000, 0xf60, 0x0 }, + { 0xffe03080, 0x9d003080, 0xf60, 0x0 }, + { 0xffe03080, 0x9d801000, 0xf60, 0x0 }, + { 0xf9e00000, 0x49200000, 0x61f3fe0, 0x0 }, + { 0xf9e00000, 0xa1c00000, 0x60020ff, 0x0 }, + { 0xf9e00000, 0x90200000, 0x6003fe0, 0x0 }, + { 0xffe03080, 0x9d201000, 0xf60, 0x0 }, + { 0xffe03884, 0xafa01080, 0x30078, 0x0 }, + { 0xffe02084, 0xaf602080, 0x30078, 0x0 }, + { 0xffe038c0, 0xada000c0, 0x3f, 0x0 }, + { 0xffe02080, 0xab400080, 0x3f, 0x0 }, + { 0xff000016, 0xde000004, 0xe020e8, 0x0 }, + { 0xffe00004, 0x44000000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x20000000, 0x3000fe, 0x0 }, + { 0xfc003d18, 0x28003c10, 0x3f00000, 0x1 }, + { 0xff600018, 0xdd000008, 0x1fe0, 0x0 }, + { 0xffe020c0, 0xadc000c0, 0x3f, 0x0 }, + { 0xffe020c0, 0xadc00080, 0x3f, 0x0 }, + { 0xffe03000, 0x9b801000, 0xf60, 0x0 }, + { 0xf8003fc7, 0x40003f46, 0x7f00000, 0x1 }, + { 0xf7c02000, 0x21c02000, 0x3000fe, 0x0 }, + { 0xffe01804, 0x40a00000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x26402000, 0x3000fe, 0x0 }, + { 0xffe03080, 0x9c401080, 0xf60, 0x0 }, + { 0xffe00000, 0x39200000, 0x201f, 0x0 }, + { 0xffe03080, 0x9c403000, 0xf60, 0x0 }, + { 0xf7c02000, 0x11002000, 0x3000fe, 0x0 }, + { 0xfc003c00, 0x28002800, 0x3f00000, 0x1 }, + { 0xffe00004, 0x40400000, 0x20f8, 0x0 }, + { 0xf7c02000, 0x26802000, 0x3000fe, 0x0 }, + { 0xf7c02000, 0x13000000, 0x3000fe, 0x0 }, + { 0xffe00004, 0x42600000, 0x20f8, 0x0 }, + { 0xf8003000, 0x60001000, 0x7f00000, 0x1 }, + { 0xff602060, 0x3e400020, 0x1f80, 0x0 }, + { 0xff602060, 0x3f000000, 0x1f80, 0x0 }, + { 0xf7c02000, 0x24c02000, 0x3000fe, 0x0 }, + { 0xff802000, 0x74002000, 0x1fe0, 0x0 }, + { 0xf8003800, 0x20002000, 0x7f00000, 0x1 }, + { 0xffe03000, 0x9aa01000, 0xf60, 0x0 }, + { 0xf7c02000, 0x12400000, 0x3000fe, 0x0 }, + { 0xff602060, 0x3f000060, 0x1f80, 0x0 }, + { 0xf7c02000, 0x11000000, 0x3000fe, 0x0 }, +}; diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableAtoms.h b/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableAtoms.h new file mode 100644 index 000000000000..a2505aa460c5 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableAtoms.h @@ -0,0 +1,29 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonExecutableAtoms.h --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_EXECUTABLE_ATOM_H +#define LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_EXECUTABLE_ATOM_H + +#include "ELFFile.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, false> HexagonELFType; +class HexagonLinkingContext; + +template <class HexagonELFType> class HexagonRuntimeFile + : public RuntimeFile<HexagonELFType> { +public: + HexagonRuntimeFile(HexagonLinkingContext &context) + : RuntimeFile<HexagonELFType>(context, "Hexagon runtime file") {} +}; +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_EXECUTABLE_ATOM_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableWriter.h b/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableWriter.h new file mode 100644 index 000000000000..0848e64166fa --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonExecutableWriter.h @@ -0,0 +1,86 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonExecutableWriter.h -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef HEXAGON_EXECUTABLE_WRITER_H +#define HEXAGON_EXECUTABLE_WRITER_H + +#include "ExecutableWriter.h" +#include "HexagonELFWriters.h" +#include "HexagonExecutableAtoms.h" +#include "HexagonLinkingContext.h" + +namespace lld { +namespace elf { + +template <typename ELFT> class HexagonTargetLayout; + +template <class ELFT> +class HexagonExecutableWriter : public ExecutableWriter<ELFT>, + public HexagonELFWriter<ELFT> { +public: + HexagonExecutableWriter(HexagonLinkingContext &context, + HexagonTargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues(); + + virtual std::error_code setELFHeader() { + ExecutableWriter<ELFT>::setELFHeader(); + HexagonELFWriter<ELFT>::setELFHeader(*this->_elfHeader); + return std::error_code(); + } + +private: + void addDefaultAtoms() { + _hexagonRuntimeFile->addAbsoluteAtom("_SDA_BASE_"); + if (this->_context.isDynamic()) { + _hexagonRuntimeFile->addAbsoluteAtom("_GLOBAL_OFFSET_TABLE_"); + _hexagonRuntimeFile->addAbsoluteAtom("_DYNAMIC"); + } + } + + HexagonLinkingContext &_hexagonLinkingContext; + HexagonTargetLayout<ELFT> &_hexagonTargetLayout; + std::unique_ptr<HexagonRuntimeFile<ELFT>> _hexagonRuntimeFile; +}; + +template <class ELFT> +HexagonExecutableWriter<ELFT>::HexagonExecutableWriter( + HexagonLinkingContext &context, HexagonTargetLayout<ELFT> &layout) + : ExecutableWriter<ELFT>(context, layout), + HexagonELFWriter<ELFT>(context, layout), _hexagonLinkingContext(context), + _hexagonTargetLayout(layout), + _hexagonRuntimeFile(new HexagonRuntimeFile<ELFT>(context)) {} + +template <class ELFT> +bool HexagonExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter<ELFT>::createImplicitFiles(result); + // Add the default atoms as defined for hexagon + addDefaultAtoms(); + result.push_back(std::move(_hexagonRuntimeFile)); + return true; +} + +template <class ELFT> +void HexagonExecutableWriter<ELFT>::finalizeDefaultAtomValues() { + // Finalize the atom values that are part of the parent. + ExecutableWriter<ELFT>::finalizeDefaultAtomValues(); + auto sdabaseAtomIter = _hexagonTargetLayout.findAbsoluteAtom("_SDA_BASE_"); + (*sdabaseAtomIter)->_virtualAddr = + _hexagonTargetLayout.getSDataSection()->virtualAddr(); + HexagonELFWriter<ELFT>::finalizeHexagonRuntimeAtomValues(); +} + +} // namespace elf +} // namespace lld + +#endif // HEXAGON_EXECUTABLE_WRITER_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.cpp b/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.cpp new file mode 100644 index 000000000000..7eacb2b44c3b --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.cpp @@ -0,0 +1,25 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.cpp -------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonLinkingContext.h" +#include "HexagonTargetHandler.h" + +using namespace lld::elf; + +std::unique_ptr<lld::ELFLinkingContext> +HexagonLinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::hexagon) + return std::unique_ptr<lld::ELFLinkingContext>( + new HexagonLinkingContext(triple)); + return nullptr; +} + +HexagonLinkingContext::HexagonLinkingContext(llvm::Triple triple) + : ELFLinkingContext(triple, std::unique_ptr<TargetHandlerBase>( + new HexagonTargetHandler(*this))) {} diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.h b/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.h new file mode 100644 index 000000000000..c920cdf153aa --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.h @@ -0,0 +1,69 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonLinkingContext.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_LINKING_CONTEXT_H +#define LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_LINKING_CONTEXT_H + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, false> HexagonELFType; + +class HexagonLinkingContext final : public ELFLinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + HexagonLinkingContext(llvm::Triple triple); + + void addPasses(PassManager &) override; + + bool isDynamicRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + switch (r.kindValue()) { + case llvm::ELF::R_HEX_RELATIVE: + case llvm::ELF::R_HEX_GLOB_DAT: + return true; + default: + return false; + } + } + + bool isPLTRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + switch (r.kindValue()) { + case llvm::ELF::R_HEX_JMP_SLOT: + return true; + default: + return false; + } + } + + /// \brief Hexagon has only one relative relocation + /// a) for supporting relative relocs - R_HEX_RELATIVE + bool isRelativeReloc(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + switch (r.kindValue()) { + case llvm::ELF::R_HEX_RELATIVE: + return true; + default: + return false; + } + } +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_LINKING_CONTEXT_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationFunctions.h b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationFunctions.h new file mode 100644 index 000000000000..2b9e25ce363b --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationFunctions.h @@ -0,0 +1,49 @@ +//===- HexagonRelocationFunction.h ----------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_RELOCATION_FUNCTIONS_H +#define LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_RELOCATION_FUNCTIONS_H + +namespace lld { +namespace elf { + +/// \brief HexagonInstruction which is used to store various values +typedef struct { + uint32_t insnMask; + uint32_t insnCmpMask; + uint32_t insnBitMask; + bool isDuplex; +} Instruction; + +#include "HexagonEncodings.h" + +#define FINDV4BITMASK(INSN) \ + findBitMask((uint32_t) * ((llvm::support::ulittle32_t *) INSN), \ + insn_encodings, \ + sizeof(insn_encodings) / sizeof(Instruction)) + +/// \brief finds the scatter Bits that need to be used to apply relocations +inline uint32_t +findBitMask(uint32_t insn, Instruction *encodings, int32_t numInsns) { + for (int32_t i = 0; i < numInsns; i++) { + if (((insn & 0xc000) == 0) && !(encodings[i].isDuplex)) + continue; + + if (((insn & 0xc000) != 0) && (encodings[i].isDuplex)) + continue; + + if (((encodings[i].insnMask) & insn) == encodings[i].insnCmpMask) + return encodings[i].insnBitMask; + } + llvm_unreachable("found unknown instruction"); +} + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_RELOCATION_FUNCTIONS_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.cpp b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.cpp new file mode 100644 index 000000000000..21967d356a31 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.cpp @@ -0,0 +1,350 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonLinkingContext.h" +#include "HexagonRelocationFunctions.h" +#include "HexagonTargetHandler.h" +#include "HexagonRelocationHandler.h" +#include "llvm/Support/Endian.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; +using namespace llvm::support::endian; + +#define APPLY_RELOC(result) \ + write32le(location, result | read32le(location)); + +static int relocBNPCREL(uint8_t *location, uint64_t P, uint64_t S, uint64_t A, + int32_t nBits) { + int32_t result = (uint32_t)(((S + A) - P) >> 2); + int32_t range = 1 << nBits; + if (result < range && result > -range) { + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; + } + return 1; +} + +/// \brief Word32_LO: 0x00c03fff : (S + A) : Truncate +static int relocLO16(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + uint32_t result = (uint32_t)(S + A); + result = lld::scatterBits<int32_t>(result, 0x00c03fff); + APPLY_RELOC(result); + return 0; +} + +/// \brief Word32_LO: 0x00c03fff : (S + A) >> 16 : Truncate +static int relocHI16(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + uint32_t result = (uint32_t)((S + A) >> 16); + result = lld::scatterBits<int32_t>(result, 0x00c03fff); + APPLY_RELOC(result); + return 0; +} + +/// \brief Word32: 0xffffffff : (S + A) : Truncate +static int reloc32(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + uint32_t result = (uint32_t)(S + A); + APPLY_RELOC(result); + return 0; +} + +static int reloc32_6_X(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + int64_t result = ((S + A) >> 6); + int64_t range = ((int64_t)1) << 32; + if (result > range) + return 1; + result = lld::scatterBits<int32_t>(result, 0xfff3fff); + APPLY_RELOC(result); + return 0; +} + +// R_HEX_B32_PCREL_X +static int relocHexB32PCRELX(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A) { + int64_t result = ((S + A - P) >> 6); + result = lld::scatterBits<int32_t>(result, 0xfff3fff); + APPLY_RELOC(result); + return 0; +} + +// R_HEX_BN_PCREL_X +static int relocHexBNPCRELX(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A, int nbits) { + int32_t result = ((S + A - P) & 0x3f); + int32_t range = 1 << nbits; + if (result < range && result > -range) { + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; + } + return 1; +} + +// R_HEX_6_PCREL_X +static int relocHex6PCRELX(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A) { + int32_t result = (S + A - P); + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +// R_HEX_N_X : Word32_U6 : (S + A) : Unsigned Truncate +static int relocHex_N_X(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + uint32_t result = (S + A); + result = lld::scatterBits<uint32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +// GP REL relocations +static int relocHexGPRELN(uint8_t *location, uint64_t P, uint64_t S, uint64_t A, + uint64_t GP, int nShiftBits) { + int32_t result = (int64_t)((S + A - GP) >> nShiftBits); + int32_t range = 1L << 16; + if (result <= range) { + result = lld::scatterBits<uint32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; + } + return 1; +} + +/// \brief Word32_LO: 0x00c03fff : (G) : Truncate +static int relocHexGOTLO16(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)(A-GOT); + result = lld::scatterBits<int32_t>(result, 0x00c03fff); + APPLY_RELOC(result); + return 0; +} + +/// \brief Word32_LO: 0x00c03fff : (G) >> 16 : Truncate +static int relocHexGOTHI16(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)((A-GOT) >> 16); + result = lld::scatterBits<int32_t>(result, 0x00c03fff); + APPLY_RELOC(result); + return 0; +} + +/// \brief Word32: 0xffffffff : (G) : Truncate +static int relocHexGOT32(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)(GOT - A); + APPLY_RELOC(result); + return 0; +} + +/// \brief Word32_U16 : (G) : Truncate +static int relocHexGOT16(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)(GOT-A); + int32_t range = 1L << 16; + if (result <= range) { + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; + } + return 1; +} + +static int relocHexGOT32_6_X(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)((A-GOT) >> 6); + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +static int relocHexGOT16_X(uint8_t *location, uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)(A-GOT); + int32_t range = 1L << 6; + if (result <= range) { + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; + } + return 1; +} + +static int relocHexGOT11_X(uint8_t *location, uint64_t A, uint64_t GOT) { + uint32_t result = (uint32_t)(A-GOT); + result = lld::scatterBits<uint32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +static int relocHexGOTRELSigned(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A, uint64_t GOT, int shiftBits = 0) { + int32_t result = (int32_t)((S + A - GOT) >> shiftBits); + result = lld::scatterBits<int32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +static int relocHexGOTRELUnsigned(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A, uint64_t GOT, int shiftBits = 0) { + uint32_t result = (uint32_t)((S + A - GOT) >> shiftBits); + result = lld::scatterBits<uint32_t>(result, FINDV4BITMASK(location)); + APPLY_RELOC(result); + return 0; +} + +static int relocHexGOTREL_HILO16(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A, uint64_t GOT, int shiftBits = 0) { + int32_t result = (int32_t)((S + A - GOT) >> shiftBits); + result = lld::scatterBits<int32_t>(result, 0x00c03fff); + APPLY_RELOC(result); + return 0; +} + +static int relocHexGOTREL_32(uint8_t *location, uint64_t P, uint64_t S, + uint64_t A, uint64_t GOT) { + int32_t result = (int32_t)(S + A - GOT); + APPLY_RELOC(result); + return 0; +} + +std::error_code HexagonTargetRelocationHandler::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t targetVAddress = writer.addressOfAtom(ref.target()); + uint64_t relocVAddress = atom._virtualAddr + ref.offsetInAtom(); + + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::Hexagon); + switch (ref.kindValue()) { + case R_HEX_B22_PCREL: + relocBNPCREL(location, relocVAddress, targetVAddress, ref.addend(), 21); + break; + case R_HEX_B15_PCREL: + relocBNPCREL(location, relocVAddress, targetVAddress, ref.addend(), 14); + break; + case R_HEX_B9_PCREL: + relocBNPCREL(location, relocVAddress, targetVAddress, ref.addend(), 8); + break; + case R_HEX_LO16: + relocLO16(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_HI16: + relocHI16(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_32: + reloc32(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_32_6_X: + reloc32_6_X(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_B32_PCREL_X: + relocHexB32PCRELX(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_B22_PCREL_X: + relocHexBNPCRELX(location, relocVAddress, targetVAddress, ref.addend(), 21); + break; + case R_HEX_B15_PCREL_X: + relocHexBNPCRELX(location, relocVAddress, targetVAddress, ref.addend(), 14); + break; + case R_HEX_B13_PCREL_X: + relocHexBNPCRELX(location, relocVAddress, targetVAddress, ref.addend(), 12); + break; + case R_HEX_B9_PCREL_X: + relocHexBNPCRELX(location, relocVAddress, targetVAddress, ref.addend(), 8); + break; + case R_HEX_B7_PCREL_X: + relocHexBNPCRELX(location, relocVAddress, targetVAddress, ref.addend(), 6); + break; + case R_HEX_GPREL16_0: + relocHexGPRELN(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getSDataSection()->virtualAddr(), 0); + break; + case R_HEX_GPREL16_1: + relocHexGPRELN(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getSDataSection()->virtualAddr(), 1); + break; + case R_HEX_GPREL16_2: + relocHexGPRELN(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getSDataSection()->virtualAddr(), 2); + break; + case R_HEX_GPREL16_3: + relocHexGPRELN(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getSDataSection()->virtualAddr(), 3); + break; + case R_HEX_16_X: + case R_HEX_12_X: + case R_HEX_11_X: + case R_HEX_10_X: + case R_HEX_9_X: + case R_HEX_8_X: + case R_HEX_7_X: + case R_HEX_6_X: + relocHex_N_X(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_6_PCREL_X: + relocHex6PCRELX(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_HEX_JMP_SLOT: + case R_HEX_GLOB_DAT: + break; + case R_HEX_GOTREL_32: + relocHexGOTREL_32(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOTREL_LO16: + relocHexGOTREL_HILO16(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOTREL_HI16: + relocHexGOTREL_HILO16(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getGOTSymAddr(), 16); + break; + case R_HEX_GOT_LO16: + relocHexGOTLO16(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_HI16: + relocHexGOTHI16(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_32: + relocHexGOT32(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_16: + relocHexGOT16(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_32_6_X: + relocHexGOT32_6_X(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_16_X: + relocHexGOT16_X(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOT_11_X: + relocHexGOT11_X(location, targetVAddress, + _hexagonTargetLayout.getGOTSymAddr()); + break; + case R_HEX_GOTREL_32_6_X: + relocHexGOTRELSigned(location, relocVAddress, targetVAddress, ref.addend(), + _hexagonTargetLayout.getGOTSymAddr(), 6); + break; + case R_HEX_GOTREL_16_X: + case R_HEX_GOTREL_11_X: + relocHexGOTRELUnsigned(location, relocVAddress, targetVAddress, + ref.addend(), _hexagonTargetLayout.getGOTSymAddr()); + break; + + default: + return make_unhandled_reloc_error(); + } + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.h b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.h new file mode 100644 index 000000000000..4795d0264b9c --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.h @@ -0,0 +1,35 @@ +//===- lld/ReaderWriter/ELF/Hexagon/HexagonRelocationHandler.h -----------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_RELOCATION_HANDLER_H +#define LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_RELOCATION_HANDLER_H + +#include "HexagonSectionChunks.h" +#include "HexagonTargetHandler.h" +#include "lld/ReaderWriter/RelocationHelperFunctions.h" + +namespace lld { +namespace elf { + +class HexagonTargetHandler; + +class HexagonTargetRelocationHandler final : public TargetRelocationHandler { +public: + HexagonTargetRelocationHandler(HexagonTargetLayout<HexagonELFType> &layout) + : _hexagonTargetLayout(layout) {} + + std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const override; + +private: + HexagonTargetLayout<HexagonELFType> &_hexagonTargetLayout; +}; +} // elf +} // lld +#endif diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonSectionChunks.h b/lib/ReaderWriter/ELF/Hexagon/HexagonSectionChunks.h new file mode 100644 index 000000000000..5b3fbbbd899b --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonSectionChunks.h @@ -0,0 +1,86 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonSectionChunks.h-----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef HEXAGON_SECTION_CHUNKS_H +#define HEXAGON_SECTION_CHUNKS_H + +#include "HexagonTargetHandler.h" + +namespace lld { +namespace elf { +template <typename ELFT> class HexagonTargetLayout; +class HexagonLinkingContext; + +/// \brief Handle Hexagon SData section +template <class HexagonELFType> +class SDataSection : public AtomSection<HexagonELFType> { +public: + SDataSection(const HexagonLinkingContext &context) + : AtomSection<HexagonELFType>( + context, ".sdata", DefinedAtom::typeDataFast, 0, + HexagonTargetLayout<HexagonELFType>::ORDER_SDATA) { + this->_type = SHT_PROGBITS; + this->_flags = SHF_ALLOC | SHF_WRITE; + this->_alignment = 4096; + } + + /// \brief Finalize the section contents before writing + virtual void doPreFlight(); + + /// \brief Does this section have an output segment. + virtual bool hasOutputSegment() { return true; } + + const lld::AtomLayout *appendAtom(const Atom *atom) { + const DefinedAtom *definedAtom = cast<DefinedAtom>(atom); + DefinedAtom::Alignment atomAlign = definedAtom->alignment(); + uint64_t alignment = 1u << atomAlign.powerOf2; + this->_atoms.push_back(new (this->_alloc) lld::AtomLayout(atom, 0, 0)); + // Set the section alignment to the largest alignment + // std::max doesn't support uint64_t + if (this->_alignment < alignment) + this->_alignment = alignment; + return (this->_atoms.back()); + } + +}; // SDataSection + +template <class HexagonELFType> +void SDataSection<HexagonELFType>::doPreFlight() { + // sort the atoms on the alignments they have been set + std::stable_sort(this->_atoms.begin(), this->_atoms.end(), + [](const lld::AtomLayout * A, + const lld::AtomLayout * B) { + const DefinedAtom *definedAtomA = cast<DefinedAtom>(A->_atom); + const DefinedAtom *definedAtomB = cast<DefinedAtom>(B->_atom); + int64_t alignmentA = 1 << definedAtomA->alignment().powerOf2; + int64_t alignmentB = 1 << definedAtomB->alignment().powerOf2; + if (alignmentA == alignmentB) { + if (definedAtomA->merge() == DefinedAtom::mergeAsTentative) + return false; + if (definedAtomB->merge() == DefinedAtom::mergeAsTentative) + return true; + } + return alignmentA < alignmentB; + }); + + // Set the fileOffset, and the appropriate size of the section + for (auto &ai : this->_atoms) { + const DefinedAtom *definedAtom = cast<DefinedAtom>(ai->_atom); + DefinedAtom::Alignment atomAlign = definedAtom->alignment(); + uint64_t fOffset = this->alignOffset(this->fileSize(), atomAlign); + uint64_t mOffset = this->alignOffset(this->memSize(), atomAlign); + ai->_fileOffset = fOffset; + this->_fsize = fOffset + definedAtom->size(); + this->_msize = mOffset + definedAtom->size(); + } +} // finalize + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_HEXAGON_HEXAGON_SECTION_CHUNKS_H diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.cpp b/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.cpp new file mode 100644 index 000000000000..9b10c2f160f4 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.cpp @@ -0,0 +1,334 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "HexagonExecutableWriter.h" +#include "HexagonDynamicLibraryWriter.h" +#include "HexagonLinkingContext.h" +#include "HexagonTargetHandler.h" + +using namespace lld; +using namespace elf; +using namespace llvm::ELF; + +using llvm::makeArrayRef; + +HexagonTargetHandler::HexagonTargetHandler(HexagonLinkingContext &context) + : _hexagonLinkingContext(context), + _hexagonRuntimeFile(new HexagonRuntimeFile<HexagonELFType>(context)), + _hexagonTargetLayout(new HexagonTargetLayout<HexagonELFType>(context)), + _hexagonRelocationHandler(new HexagonTargetRelocationHandler( + *_hexagonTargetLayout.get())) {} + +std::unique_ptr<Writer> HexagonTargetHandler::getWriter() { + switch (_hexagonLinkingContext.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>( + new elf::HexagonExecutableWriter<HexagonELFType>( + _hexagonLinkingContext, *_hexagonTargetLayout.get())); + case llvm::ELF::ET_DYN: + return std::unique_ptr<Writer>( + new elf::HexagonDynamicLibraryWriter<HexagonELFType>( + _hexagonLinkingContext, *_hexagonTargetLayout.get())); + case llvm::ELF::ET_REL: + llvm_unreachable("TODO: support -r mode"); + default: + llvm_unreachable("unsupported output type"); + } +} + +using namespace llvm::ELF; + +// .got atom +const uint8_t hexagonGotAtomContent[4] = { 0 }; +// .got.plt atom (entry 0) +const uint8_t hexagonGotPlt0AtomContent[16] = { 0 }; +// .got.plt atom (all other entries) +const uint8_t hexagonGotPltAtomContent[4] = { 0 }; +// .plt (entry 0) +const uint8_t hexagonPlt0AtomContent[28] = { + 0x00, 0x40, 0x00, 0x00, // { immext (#0) + 0x1c, 0xc0, 0x49, 0x6a, // r28 = add (pc, ##GOT0@PCREL) } # address of GOT0 + 0x0e, 0x42, 0x9c, 0xe2, // { r14 -= add (r28, #16) # offset of GOTn from GOTa + 0x4f, 0x40, 0x9c, 0x91, // r15 = memw (r28 + #8) # object ID at GOT2 + 0x3c, 0xc0, 0x9c, 0x91, // r28 = memw (r28 + #4) }# dynamic link at GOT1 + 0x0e, 0x42, 0x0e, 0x8c, // { r14 = asr (r14, #2) # index of PLTn + 0x00, 0xc0, 0x9c, 0x52, // jumpr r28 } # call dynamic linker +}; + +// .plt (other entries) +const uint8_t hexagonPltAtomContent[16] = { + 0x00, 0x40, 0x00, 0x00, // { immext (#0) + 0x0e, 0xc0, 0x49, 0x6a, // r14 = add (pc, ##GOTn@PCREL) } # address of GOTn + 0x1c, 0xc0, 0x8e, 0x91, // r28 = memw (r14) # contents of GOTn + 0x00, 0xc0, 0x9c, 0x52, // jumpr r28 # call it +}; + +class HexagonGOTAtom : public GOTAtom { +public: + HexagonGOTAtom(const File &f) : GOTAtom(f, ".got") {} + + ArrayRef<uint8_t> rawContent() const override { + return makeArrayRef(hexagonGotAtomContent); + } + + Alignment alignment() const override { return Alignment(2); } +}; + +class HexagonGOTPLTAtom : public GOTAtom { +public: + HexagonGOTPLTAtom(const File &f) : GOTAtom(f, ".got.plt") {} + + ArrayRef<uint8_t> rawContent() const override { + return makeArrayRef(hexagonGotPltAtomContent); + } + + Alignment alignment() const override { return Alignment(2); } +}; + +class HexagonGOTPLT0Atom : public GOTAtom { +public: + HexagonGOTPLT0Atom(const File &f) : GOTAtom(f, ".got.plt") {} + + ArrayRef<uint8_t> rawContent() const override { + return makeArrayRef(hexagonGotPlt0AtomContent); + } + + Alignment alignment() const override { return Alignment(3); } +}; + +class HexagonPLT0Atom : public PLT0Atom { +public: + HexagonPLT0Atom(const File &f) : PLT0Atom(f) {} + + ArrayRef<uint8_t> rawContent() const override { + return makeArrayRef(hexagonPlt0AtomContent); + } +}; + +class HexagonPLTAtom : public PLTAtom { + +public: + HexagonPLTAtom(const File &f, StringRef secName) : PLTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return makeArrayRef(hexagonPltAtomContent); + } +}; + +class ELFPassFile : public SimpleFile { +public: + ELFPassFile(const ELFLinkingContext &eti) : SimpleFile("ELFPassFile") { + setOrdinal(eti.getNextOrdinalAndIncrement()); + } + + llvm::BumpPtrAllocator _alloc; +}; + +/// \brief Create GOT and PLT entries for relocations. Handles standard GOT/PLT +template <class Derived> class GOTPLTPass : public Pass { + /// \brief Handle a specific reference. + void handleReference(const DefinedAtom &atom, const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return; + assert(ref.kindArch() == Reference::KindArch::Hexagon); + switch (ref.kindValue()) { + case R_HEX_PLT_B22_PCREL: + case R_HEX_B22_PCREL: + static_cast<Derived *>(this)->handlePLT32(ref); + break; + case R_HEX_GOT_LO16: + case R_HEX_GOT_HI16: + case R_HEX_GOT_32_6_X: + case R_HEX_GOT_16_X: + case R_HEX_GOT_11_X: + static_cast<Derived *>(this)->handleGOTREL(ref); + break; + } + } + +protected: + /// \brief Create a GOT entry containing 0. + const GOTAtom *getNullGOT() { + if (!_null) { + _null = new (_file._alloc) HexagonGOTPLTAtom(_file); +#ifndef NDEBUG + _null->_name = "__got_null"; +#endif + } + return _null; + } + +public: + GOTPLTPass(const ELFLinkingContext &ctx) + : _file(ctx), _null(nullptr), _PLT0(nullptr), _got0(nullptr) {} + + /// \brief Do the pass. + /// + /// The goal here is to first process each reference individually. Each call + /// to handleReference may modify the reference itself and/or create new + /// atoms which must be stored in one of the maps below. + /// + /// After all references are handled, the atoms created during that are all + /// added to mf. + void perform(std::unique_ptr<MutableFile> &mf) override { + // Process all references. + for (const auto &atom : mf->defined()) + for (const auto &ref : *atom) + handleReference(*atom, *ref); + + // Add all created atoms to the link. + uint64_t ordinal = 0; + if (_PLT0) { + _PLT0->setOrdinal(ordinal++); + mf->addAtom(*_PLT0); + } + for (auto &plt : _pltVector) { + plt->setOrdinal(ordinal++); + mf->addAtom(*plt); + } + if (_null) { + _null->setOrdinal(ordinal++); + mf->addAtom(*_null); + } + if (_got0) { + _got0->setOrdinal(ordinal++); + mf->addAtom(*_got0); + } + for (auto &got : _gotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + } + +protected: + /// \brief Owner of all the Atoms created by this pass. + ELFPassFile _file; + + /// \brief Map Atoms to their GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotMap; + + /// \brief Map Atoms to their PLT entries. + llvm::DenseMap<const Atom *, PLTAtom *> _pltMap; + + /// \brief the list of GOT/PLT atoms + std::vector<GOTAtom *> _gotVector; + std::vector<PLTAtom *> _pltVector; + + /// \brief GOT entry that is always 0. Used for undefined weaks. + GOTAtom *_null; + + /// \brief The got and plt entries for .PLT0. This is used to call into the + /// dynamic linker for symbol resolution. + /// @{ + PLT0Atom *_PLT0; + GOTAtom *_got0; + /// @} +}; + +class DynamicGOTPLTPass final : public GOTPLTPass<DynamicGOTPLTPass> { +public: + DynamicGOTPLTPass(const elf::HexagonLinkingContext &ctx) : GOTPLTPass(ctx) { + _got0 = new (_file._alloc) HexagonGOTPLT0Atom(_file); +#ifndef NDEBUG + _got0->_name = "__got0"; +#endif + } + + const PLT0Atom *getPLT0() { + if (_PLT0) + return _PLT0; + _PLT0 = new (_file._alloc) HexagonPLT0Atom(_file); + _PLT0->addReferenceELF_Hexagon(R_HEX_B32_PCREL_X, 0, _got0, 0); + _PLT0->addReferenceELF_Hexagon(R_HEX_6_PCREL_X, 4, _got0, 4); + DEBUG_WITH_TYPE("PLT", llvm::dbgs() << "[ PLT0/GOT0 ] " + << "Adding plt0/got0 \n"); + return _PLT0; + } + + const PLTAtom *getPLTEntry(const Atom *a) { + auto plt = _pltMap.find(a); + if (plt != _pltMap.end()) + return plt->second; + auto ga = new (_file._alloc) HexagonGOTPLTAtom(_file); + ga->addReferenceELF_Hexagon(R_HEX_JMP_SLOT, 0, a, 0); + auto pa = new (_file._alloc) HexagonPLTAtom(_file, ".plt"); + pa->addReferenceELF_Hexagon(R_HEX_B32_PCREL_X, 0, ga, 0); + pa->addReferenceELF_Hexagon(R_HEX_6_PCREL_X, 4, ga, 4); + + // Point the got entry to the PLT0 atom initially + ga->addReferenceELF_Hexagon(R_HEX_32, 0, getPLT0(), 0); +#ifndef NDEBUG + ga->_name = "__got_"; + ga->_name += a->name(); + pa->_name = "__plt_"; + pa->_name += a->name(); + DEBUG_WITH_TYPE("PLT", llvm::dbgs() << "[" << a->name() << "] " + << "Adding plt/got: " << pa->_name + << "/" << ga->_name << "\n"); +#endif + _gotMap[a] = ga; + _pltMap[a] = pa; + _gotVector.push_back(ga); + _pltVector.push_back(pa); + return pa; + } + + const GOTAtom *getGOTEntry(const Atom *a) { + auto got = _gotMap.find(a); + if (got != _gotMap.end()) + return got->second; + auto ga = new (_file._alloc) HexagonGOTAtom(_file); + ga->addReferenceELF_Hexagon(R_HEX_GLOB_DAT, 0, a, 0); + +#ifndef NDEBUG + ga->_name = "__got_"; + ga->_name += a->name(); + DEBUG_WITH_TYPE("GOT", llvm::dbgs() << "[" << a->name() << "] " + << "Adding got: " << ga->_name << "\n"); +#endif + _gotMap[a] = ga; + _gotVector.push_back(ga); + return ga; + } + + std::error_code handleGOTREL(const Reference &ref) { + // Turn this so that the target is set to the GOT entry + const_cast<Reference &>(ref).setTarget(getGOTEntry(ref.target())); + return std::error_code(); + } + + std::error_code handlePLT32(const Reference &ref) { + // Turn this into a PC32 to the PLT entry. + assert(ref.kindNamespace() == Reference::KindNamespace::ELF); + assert(ref.kindArch() == Reference::KindArch::Hexagon); + const_cast<Reference &>(ref).setKindValue(R_HEX_B22_PCREL); + const_cast<Reference &>(ref).setTarget(getPLTEntry(ref.target())); + return std::error_code(); + } +}; + +void elf::HexagonLinkingContext::addPasses(PassManager &pm) { + if (isDynamic()) + pm.add(llvm::make_unique<DynamicGOTPLTPass>(*this)); + ELFLinkingContext::addPasses(pm); +} + +void HexagonTargetHandler::registerRelocationNames(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, + Reference::KindArch::Hexagon, kindStrings); +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings HexagonTargetHandler::kindStrings[] = { +#include "llvm/Support/ELFRelocs/Hexagon.def" + LLD_KIND_STRING_END +}; + +#undef ELF_RELOC diff --git a/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.h b/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.h new file mode 100644 index 000000000000..f4315f710ec7 --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.h @@ -0,0 +1,143 @@ +//===- lib/ReaderWriter/ELF/Hexagon/HexagonTargetHandler.h ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef HEXAGON_TARGET_HANDLER_H +#define HEXAGON_TARGET_HANDLER_H + +#include "DefaultTargetHandler.h" +#include "HexagonELFReader.h" +#include "HexagonExecutableAtoms.h" +#include "HexagonRelocationHandler.h" +#include "HexagonSectionChunks.h" +#include "TargetLayout.h" + +namespace lld { +namespace elf { +class HexagonLinkingContext; + +/// \brief TargetLayout for Hexagon +template <class HexagonELFType> +class HexagonTargetLayout final : public TargetLayout<HexagonELFType> { +public: + enum HexagonSectionOrder { + ORDER_SDATA = 205 + }; + + HexagonTargetLayout(HexagonLinkingContext &hti) + : TargetLayout<HexagonELFType>(hti), _sdataSection(nullptr), + _gotSymAtom(nullptr), _cachedGotSymAtom(false) { + _sdataSection = new (_alloc) SDataSection<HexagonELFType>(hti); + } + + /// \brief Return the section order for a input section + virtual Layout::SectionOrder getSectionOrder( + StringRef name, int32_t contentType, int32_t contentPermissions) { + if ((contentType == DefinedAtom::typeDataFast) || + (contentType == DefinedAtom::typeZeroFillFast)) + return ORDER_SDATA; + + return DefaultLayout<HexagonELFType>::getSectionOrder(name, contentType, + contentPermissions); + } + + /// \brief Return the appropriate input section name. + virtual StringRef getInputSectionName(const DefinedAtom *da) const { + switch (da->contentType()) { + case DefinedAtom::typeDataFast: + case DefinedAtom::typeZeroFillFast: + return ".sdata"; + default: + break; + } + return DefaultLayout<HexagonELFType>::getInputSectionName(da); + } + + /// \brief Gets or creates a section. + virtual AtomSection<HexagonELFType> * + createSection(StringRef name, int32_t contentType, + DefinedAtom::ContentPermissions contentPermissions, + Layout::SectionOrder sectionOrder) { + if ((contentType == DefinedAtom::typeDataFast) || + (contentType == DefinedAtom::typeZeroFillFast)) + return _sdataSection; + return DefaultLayout<HexagonELFType>::createSection( + name, contentType, contentPermissions, sectionOrder); + } + + /// \brief get the segment type for the section thats defined by the target + virtual Layout::SegmentType + getSegmentType(Section<HexagonELFType> *section) const { + if (section->order() == ORDER_SDATA) + return PT_LOAD; + + return DefaultLayout<HexagonELFType>::getSegmentType(section); + } + + Section<HexagonELFType> *getSDataSection() const { + return _sdataSection; + } + + uint64_t getGOTSymAddr() { + if (!_cachedGotSymAtom) { + auto gotAtomIter = this->findAbsoluteAtom("_GLOBAL_OFFSET_TABLE_"); + _gotSymAtom = (*gotAtomIter); + _cachedGotSymAtom = true; + } + if (_gotSymAtom) + return _gotSymAtom->_virtualAddr; + return 0; + } + +private: + llvm::BumpPtrAllocator _alloc; + SDataSection<HexagonELFType> *_sdataSection; + AtomLayout *_gotSymAtom; + bool _cachedGotSymAtom; +}; + +/// \brief TargetHandler for Hexagon +class HexagonTargetHandler final : + public DefaultTargetHandler<HexagonELFType> { +public: + HexagonTargetHandler(HexagonLinkingContext &targetInfo); + + void registerRelocationNames(Registry ®istry) override; + + const HexagonTargetRelocationHandler &getRelocationHandler() const override { + return *(_hexagonRelocationHandler.get()); + } + + HexagonTargetLayout<HexagonELFType> &getTargetLayout() override { + return *(_hexagonTargetLayout.get()); + } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>( + new HexagonELFObjectReader(_hexagonLinkingContext)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>( + new HexagonELFDSOReader(_hexagonLinkingContext)); + } + + std::unique_ptr<Writer> getWriter() override; + +private: + llvm::BumpPtrAllocator _alloc; + static const Registry::KindStrings kindStrings[]; + HexagonLinkingContext &_hexagonLinkingContext; + std::unique_ptr<HexagonRuntimeFile<HexagonELFType> > _hexagonRuntimeFile; + std::unique_ptr<HexagonTargetLayout<HexagonELFType>> _hexagonTargetLayout; + std::unique_ptr<HexagonTargetRelocationHandler> _hexagonRelocationHandler; +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Hexagon/Makefile b/lib/ReaderWriter/ELF/Hexagon/Makefile new file mode 100644 index 000000000000..8d6f1a0a3b1e --- /dev/null +++ b/lib/ReaderWriter/ELF/Hexagon/Makefile @@ -0,0 +1,16 @@ +##===- lld/lib/ReaderWriter/ELF/Hexagon/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldHexagonELFTarget +USEDLIBS = lldCore.a + +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF/Hexagon -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/Layout.h b/lib/ReaderWriter/ELF/Layout.h new file mode 100644 index 000000000000..826cf5035d59 --- /dev/null +++ b/lib/ReaderWriter/ELF/Layout.h @@ -0,0 +1,59 @@ +//===- lib/ReaderWriter/ELF/Layout.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_LAYOUT_H +#define LLD_READER_WRITER_ELF_LAYOUT_H + +#include "lld/Core/DefinedAtom.h" +#include "lld/ReaderWriter/AtomLayout.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorOr.h" + +namespace lld { +namespace elf { + +/// \brief The ELFLayout is an abstract class for managing the final layout for +/// the kind of binaries(Shared Libraries / Relocatables / Executables 0 +/// Each architecture (Hexagon, MIPS) would have a concrete +/// subclass derived from Layout for generating each binary thats +// needed by the lld linker +class Layout { +public: + typedef uint32_t SectionOrder; + typedef uint32_t SegmentType; + typedef uint32_t Flags; + +public: + /// Return the order the section would appear in the output file + virtual SectionOrder getSectionOrder(StringRef name, int32_t contentType, + int32_t contentPerm) = 0; + /// \brief Append the Atom to the layout and create appropriate sections. + /// \returns A reference to the atom layout or an error. The atom layout will + /// be updated as linking progresses. + virtual ErrorOr<const lld::AtomLayout *> addAtom(const Atom *atom) = 0; + /// find the Atom in the current layout + virtual const AtomLayout *findAtomLayoutByName(StringRef name) const = 0; + /// associates a section to a segment + virtual void assignSectionsToSegments() = 0; + /// associates a virtual address to the segment, section, and the atom + virtual void assignVirtualAddress() = 0; + +public: + Layout() {} + + virtual ~Layout() { } +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Makefile b/lib/ReaderWriter/ELF/Makefile new file mode 100644 index 000000000000..5791ecb9733d --- /dev/null +++ b/lib/ReaderWriter/ELF/Makefile @@ -0,0 +1,18 @@ +##===- lld/lib/ReaderWriter/ELF/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldELF + +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +# these link against this lib +PARALLEL_DIRS := Hexagon X86 X86_64 Mips AArch64 ARM + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/Mips/CMakeLists.txt b/lib/ReaderWriter/ELF/Mips/CMakeLists.txt new file mode 100644 index 000000000000..d982508b7ddc --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/CMakeLists.txt @@ -0,0 +1,14 @@ +add_llvm_library(lldMipsELFTarget + MipsCtorsOrderPass.cpp + MipsELFFlagsMerger.cpp + MipsLinkingContext.cpp + MipsRelocationHandler.cpp + MipsRelocationPass.cpp + MipsTargetHandler.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/Mips/Makefile b/lib/ReaderWriter/ELF/Mips/Makefile new file mode 100644 index 000000000000..0b2f4ff82279 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/Makefile @@ -0,0 +1,15 @@ +##===- lld/lib/ReaderWriter/ELF/Mips/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldMipsELFTarget +USEDLIBS = lldCore.a +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.cpp b/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.cpp new file mode 100644 index 000000000000..8bf80257fc89 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.cpp @@ -0,0 +1,73 @@ +//===- lib/ReaderWriter/ELF/Mips/Mips/CtorsOrderPass.cpp ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsCtorsOrderPass.h" +#include <algorithm> +#include <climits> + +using namespace lld; +using namespace lld::elf; + +static bool matchCrtObjName(StringRef objName, StringRef objPath) { + if (!objPath.endswith(".o")) + return false; + + // check *<objName> case + objPath = objPath.drop_back(2); + if (objPath.endswith(objName)) + return true; + + // check *<objName>? case + return !objPath.empty() && objPath.drop_back(1).endswith(objName); +} + +static int32_t getSectionPriority(StringRef path, StringRef sectionName) { + // Arrange .ctors/.dtors sections in the following order: + // .ctors from crtbegin.o or crtbegin?.o + // .ctors from regular object files + // .ctors.* (sorted) from regular object files + // .ctors from crtend.o or crtend?.o + + if (matchCrtObjName("crtbegin", path)) + return std::numeric_limits<int32_t>::min(); + if (matchCrtObjName("crtend", path)) + return std::numeric_limits<int32_t>::max(); + + StringRef num = sectionName.drop_front().rsplit('.').second; + + int32_t priority = std::numeric_limits<int32_t>::min() + 1; + if (!num.empty()) + num.getAsInteger(10, priority); + + return priority; +} + +void MipsCtorsOrderPass::perform(std::unique_ptr<MutableFile> &f) { + auto definedAtoms = f->definedAtoms(); + + auto last = std::stable_partition(definedAtoms.begin(), definedAtoms.end(), + [](const DefinedAtom *atom) { + if (atom->sectionChoice() != DefinedAtom::sectionCustomRequired) + return false; + + StringRef name = atom->customSectionName(); + return name.startswith(".ctors") || name.startswith(".dtors"); + }); + + std::stable_sort(definedAtoms.begin(), last, + [](const DefinedAtom *left, const DefinedAtom *right) { + StringRef leftSec = left->customSectionName(); + StringRef rightSec = right->customSectionName(); + + int32_t leftPriority = getSectionPriority(left->file().path(), leftSec); + int32_t rightPriority = getSectionPriority(right->file().path(), rightSec); + + return leftPriority < rightPriority; + }); +} diff --git a/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.h b/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.h new file mode 100644 index 000000000000..eeb1a194f9c7 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.h @@ -0,0 +1,25 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsCtorsOrderPass.h ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_CTORS_ORDER_PASS_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_CTORS_ORDER_PASS_H + +#include "lld/Core/Pass.h" + +namespace lld { +namespace elf { +/// \brief This pass sorts atoms in .{ctors,dtors}.<priority> sections. +class MipsCtorsOrderPass : public Pass { +public: + void perform(std::unique_ptr<MutableFile> &mergedFile) override; +}; +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsDynamicLibraryWriter.h b/lib/ReaderWriter/ELF/Mips/MipsDynamicLibraryWriter.h new file mode 100644 index 000000000000..30b5b0ba6dae --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsDynamicLibraryWriter.h @@ -0,0 +1,101 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsDynamicLibraryWriter.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_DYNAMIC_LIBRARY_WRITER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_DYNAMIC_LIBRARY_WRITER_H + +#include "DynamicLibraryWriter.h" +#include "MipsDynamicTable.h" +#include "MipsELFWriters.h" +#include "MipsLinkingContext.h" + +namespace lld { +namespace elf { + +template <typename ELFT> class MipsSymbolTable; +template <typename ELFT> class MipsDynamicSymbolTable; +template <typename ELFT> class MipsTargetLayout; + +template <class ELFT> +class MipsDynamicLibraryWriter : public DynamicLibraryWriter<ELFT> { +public: + MipsDynamicLibraryWriter(MipsLinkingContext &ctx, + MipsTargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + void finalizeDefaultAtomValues() override; + + std::error_code setELFHeader() override { + DynamicLibraryWriter<ELFT>::setELFHeader(); + _writeHelper.setELFHeader(*this->_elfHeader); + return std::error_code(); + } + + unique_bump_ptr<SymbolTable<ELFT>> createSymbolTable() override; + unique_bump_ptr<DynamicTable<ELFT>> createDynamicTable() override; + + unique_bump_ptr<DynamicSymbolTable<ELFT>> + createDynamicSymbolTable() override; + +private: + MipsELFWriter<ELFT> _writeHelper; + MipsTargetLayout<ELFT> &_mipsTargetLayout; +}; + +template <class ELFT> +MipsDynamicLibraryWriter<ELFT>::MipsDynamicLibraryWriter( + MipsLinkingContext &ctx, MipsTargetLayout<ELFT> &layout) + : DynamicLibraryWriter<ELFT>(ctx, layout), _writeHelper(ctx, layout), + _mipsTargetLayout(layout) {} + +template <class ELFT> +bool MipsDynamicLibraryWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + DynamicLibraryWriter<ELFT>::createImplicitFiles(result); + result.push_back(std::move(_writeHelper.createRuntimeFile())); + return true; +} + +template <class ELFT> +void MipsDynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues() { + // Finalize the atom values that are part of the parent. + DynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues(); + _writeHelper.finalizeMipsRuntimeAtomValues(); +} + +template <class ELFT> +unique_bump_ptr<SymbolTable<ELFT>> + MipsDynamicLibraryWriter<ELFT>::createSymbolTable() { + return unique_bump_ptr<SymbolTable<ELFT>>(new ( + this->_alloc) MipsSymbolTable<ELFT>(this->_context)); +} + +/// \brief create dynamic table +template <class ELFT> +unique_bump_ptr<DynamicTable<ELFT>> + MipsDynamicLibraryWriter<ELFT>::createDynamicTable() { + return unique_bump_ptr<DynamicTable<ELFT>>(new ( + this->_alloc) MipsDynamicTable<ELFT>(this->_context, _mipsTargetLayout)); +} + +/// \brief create dynamic symbol table +template <class ELFT> +unique_bump_ptr<DynamicSymbolTable<ELFT>> + MipsDynamicLibraryWriter<ELFT>::createDynamicSymbolTable() { + return unique_bump_ptr<DynamicSymbolTable<ELFT>>( + new (this->_alloc) MipsDynamicSymbolTable<ELFT>( + this->_context, _mipsTargetLayout)); +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsDynamicTable.h b/lib/ReaderWriter/ELF/Mips/MipsDynamicTable.h new file mode 100644 index 000000000000..2b9562f42b57 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsDynamicTable.h @@ -0,0 +1,115 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsDynamicTable.h -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_DYNAMIC_TABLE_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_DYNAMIC_TABLE_H + +#include "DefaultLayout.h" +#include "SectionChunks.h" + +namespace lld { +namespace elf { + +template <class ELFType> class MipsTargetLayout; + +template <class MipsELFType> +class MipsDynamicTable : public DynamicTable<MipsELFType> { +public: + MipsDynamicTable(const ELFLinkingContext &ctx, + MipsTargetLayout<MipsELFType> &layout) + : DynamicTable<MipsELFType>(ctx, layout, ".dynamic", + DefaultLayout<MipsELFType>::ORDER_DYNAMIC), + _mipsTargetLayout(layout) {} + + void createDefaultEntries() override { + DynamicTable<MipsELFType>::createDefaultEntries(); + + typename DynamicTable<MipsELFType>::Elf_Dyn dyn; + + // Version id for the Runtime Linker Interface. + dyn.d_un.d_val = 1; + dyn.d_tag = DT_MIPS_RLD_VERSION; + this->addEntry(dyn); + + // MIPS flags. + dyn.d_un.d_val = RHF_NOTPOT; + dyn.d_tag = DT_MIPS_FLAGS; + this->addEntry(dyn); + + // The base address of the segment. + dyn.d_un.d_ptr = 0; + dyn.d_tag = DT_MIPS_BASE_ADDRESS; + _dt_baseaddr = this->addEntry(dyn); + + // Number of local global offset table entries. + dyn.d_un.d_val = 0; + dyn.d_tag = DT_MIPS_LOCAL_GOTNO; + _dt_localgot = this->addEntry(dyn); + + // Number of entries in the .dynsym section. + dyn.d_un.d_val = 0; + dyn.d_tag = DT_MIPS_SYMTABNO; + _dt_symtabno = this->addEntry(dyn); + + // The index of the first dynamic symbol table entry that corresponds + // to an entry in the global offset table. + dyn.d_un.d_val = 0; + dyn.d_tag = DT_MIPS_GOTSYM; + _dt_gotsym = this->addEntry(dyn); + + // Address of the .got section. + dyn.d_un.d_val = 0; + dyn.d_tag = DT_PLTGOT; + _dt_pltgot = this->addEntry(dyn); + } + + void updateDynamicTable() override { + DynamicTable<MipsELFType>::updateDynamicTable(); + + // Assign the minimum segment address to the DT_MIPS_BASE_ADDRESS tag. + auto baseAddr = std::numeric_limits<uint64_t>::max(); + for (auto si : _mipsTargetLayout.segments()) + if (si->segmentType() != llvm::ELF::PT_NULL) + baseAddr = std::min(baseAddr, si->virtualAddr()); + this->_entries[_dt_baseaddr].d_un.d_val = baseAddr; + + auto &got = _mipsTargetLayout.getGOTSection(); + + this->_entries[_dt_symtabno].d_un.d_val = this->getSymbolTable()->size(); + this->_entries[_dt_gotsym].d_un.d_val = + this-> getSymbolTable()->size() - got.getGlobalCount(); + this->_entries[_dt_localgot].d_un.d_val = got.getLocalCount(); + this->_entries[_dt_pltgot].d_un.d_ptr = + _mipsTargetLayout.findOutputSection(".got")->virtualAddr(); + } + + int64_t getGotPltTag() override { return DT_MIPS_PLTGOT; } + +protected: + /// \brief Adjust the symbol's value for microMIPS code. + uint64_t getAtomVirtualAddress(const AtomLayout *al) const override { + if (const auto *da = dyn_cast<DefinedAtom>(al->_atom)) + if (da->codeModel() == DefinedAtom::codeMipsMicro || + da->codeModel() == DefinedAtom::codeMipsMicroPIC) + return al->_virtualAddr | 1; + return al->_virtualAddr; + } + +private: + std::size_t _dt_symtabno; + std::size_t _dt_localgot; + std::size_t _dt_gotsym; + std::size_t _dt_pltgot; + std::size_t _dt_baseaddr; + MipsTargetLayout<MipsELFType> &_mipsTargetLayout; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsELFFile.h b/lib/ReaderWriter/ELF/Mips/MipsELFFile.h new file mode 100644 index 000000000000..7381c7e977bf --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsELFFile.h @@ -0,0 +1,331 @@ +//===- lib/ReaderWriter/ELF/MipsELFFile.h ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_FILE_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_FILE_H + +#include "ELFReader.h" +#include "MipsLinkingContext.h" +#include "MipsRelocationHandler.h" + +namespace llvm { +namespace object { + +template <class ELFT> +struct Elf_RegInfo; + +template <llvm::support::endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_RegInfo<ELFType<TargetEndianness, MaxAlign, false>> { + LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, false) + Elf_Word ri_gprmask; // bit-mask of used general registers + Elf_Word ri_cprmask[4]; // bit-mask of used co-processor registers + Elf_Addr ri_gp_value; // gp register value +}; + +template <llvm::support::endianness TargetEndianness, std::size_t MaxAlign> +struct Elf_RegInfo<ELFType<TargetEndianness, MaxAlign, true>> { + LLVM_ELF_IMPORT_TYPES(TargetEndianness, MaxAlign, true) + Elf_Word ri_gprmask; // bit-mask of used general registers + Elf_Word ri_pad; // unused padding field + Elf_Word ri_cprmask[4]; // bit-mask of used co-processor registers + Elf_Addr ri_gp_value; // gp register value +}; + +template <class ELFT> struct Elf_Mips_Options { + LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::MaxAlignment, + ELFT::Is64Bits) + uint8_t kind; // Determines interpretation of variable part of descriptor + uint8_t size; // Byte size of descriptor, including this header + Elf_Half section; // Section header index of section affected, + // or 0 for global options + Elf_Word info; // Kind-specific information +}; + +} // end namespace object. +} // end namespace llvm. + +namespace lld { +namespace elf { + +template <class ELFT> class MipsELFFile; + +template <class ELFT> +class MipsELFDefinedAtom : public ELFDefinedAtom<ELFT> { + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + +public: + MipsELFDefinedAtom(const MipsELFFile<ELFT> &file, StringRef symbolName, + StringRef sectionName, const Elf_Sym *symbol, + const Elf_Shdr *section, ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) + : ELFDefinedAtom<ELFT>(file, symbolName, sectionName, symbol, section, + contentData, referenceStart, referenceEnd, + referenceList) {} + + const MipsELFFile<ELFT>& file() const override { + return static_cast<const MipsELFFile<ELFT> &>(this->_owningFile); + } + + DefinedAtom::CodeModel codeModel() const override { + switch (this->_symbol->st_other & llvm::ELF::STO_MIPS_MIPS16) { + case llvm::ELF::STO_MIPS_MIPS16: + return DefinedAtom::codeMips16; + case llvm::ELF::STO_MIPS_PIC: + return DefinedAtom::codeMipsPIC; + case llvm::ELF::STO_MIPS_MICROMIPS: + return DefinedAtom::codeMipsMicro; + case llvm::ELF::STO_MIPS_MICROMIPS | llvm::ELF::STO_MIPS_PIC: + return DefinedAtom::codeMipsMicroPIC; + default: + return DefinedAtom::codeNA; + } + } +}; + +template <class ELFT> class MipsELFReference : public ELFReference<ELFT> { + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + + static const bool _isMips64EL = + ELFT::Is64Bits && ELFT::TargetEndianness == llvm::support::little; + +public: + MipsELFReference(uint64_t symValue, const Elf_Rela &rel) + : ELFReference<ELFT>( + &rel, rel.r_offset - symValue, Reference::KindArch::Mips, + rel.getType(_isMips64EL) & 0xff, rel.getSymbol(_isMips64EL)), + _tag(uint32_t(rel.getType(_isMips64EL)) >> 8) {} + + MipsELFReference(uint64_t symValue, const Elf_Rel &rel) + : ELFReference<ELFT>(rel.r_offset - symValue, Reference::KindArch::Mips, + rel.getType(_isMips64EL) & 0xff, + rel.getSymbol(_isMips64EL)), + _tag(uint32_t(rel.getType(_isMips64EL)) >> 8) {} + + uint32_t tag() const override { return _tag; } + void setTag(uint32_t tag) { _tag = tag; } + +private: + uint32_t _tag; +}; + +template <class ELFT> class MipsELFFile : public ELFFile<ELFT> { +public: + MipsELFFile(std::unique_ptr<MemoryBuffer> mb, MipsLinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<MipsELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, MipsLinkingContext &ctx) { + return std::unique_ptr<MipsELFFile<ELFT>>( + new MipsELFFile<ELFT>(std::move(mb), ctx)); + } + + bool isPIC() const { + return this->_objFile->getHeader()->e_flags & llvm::ELF::EF_MIPS_PIC; + } + + /// \brief gp register value stored in the .reginfo section. + int64_t getGP0() const { return _gp0 ? *_gp0 : 0; } + + /// \brief .tdata section address plus fixed offset. + uint64_t getTPOffset() const { return *_tpOff; } + uint64_t getDTPOffset() const { return *_dtpOff; } + +protected: + std::error_code doParse() override { + if (std::error_code ec = ELFFile<ELFT>::doParse()) + return ec; + // Retrieve some auxiliary data like GP value, TLS section address etc + // from the object file. + return readAuxData(); + } + +private: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + typedef llvm::object::Elf_Shdr_Impl<ELFT> Elf_Shdr; + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rel_Iter Elf_Rel_Iter; + typedef typename llvm::object::ELFFile<ELFT>::Elf_Rela_Iter Elf_Rela_Iter; + + enum { TP_OFFSET = 0x7000, DTP_OFFSET = 0x8000 }; + + static const bool _isMips64EL = + ELFT::Is64Bits && ELFT::TargetEndianness == llvm::support::little; + + llvm::Optional<int64_t> _gp0; + llvm::Optional<uint64_t> _tpOff; + llvm::Optional<uint64_t> _dtpOff; + + ErrorOr<ELFDefinedAtom<ELFT> *> handleDefinedSymbol( + StringRef symName, StringRef sectionName, const Elf_Sym *sym, + const Elf_Shdr *sectionHdr, ArrayRef<uint8_t> contentData, + unsigned int referenceStart, unsigned int referenceEnd, + std::vector<ELFReference<ELFT> *> &referenceList) override { + return new (this->_readerStorage) MipsELFDefinedAtom<ELFT>( + *this, symName, sectionName, sym, sectionHdr, contentData, + referenceStart, referenceEnd, referenceList); + } + + const Elf_Shdr *findSectionByType(uint64_t type) { + for (const Elf_Shdr §ion : this->_objFile->sections()) + if (section.sh_type == type) + return §ion; + return nullptr; + } + + const Elf_Shdr *findSectionByFlags(uint64_t flags) { + for (const Elf_Shdr §ion : this->_objFile->sections()) + if (section.sh_flags & flags) + return §ion; + return nullptr; + } + + std::error_code readAuxData() { + using namespace llvm::ELF; + if (const Elf_Shdr *sec = findSectionByFlags(SHF_TLS)) { + _tpOff = sec->sh_addr + TP_OFFSET; + _dtpOff = sec->sh_addr + DTP_OFFSET; + } + + typedef llvm::object::Elf_RegInfo<ELFT> Elf_RegInfo; + typedef llvm::object::Elf_Mips_Options<ELFT> Elf_Mips_Options; + + if (const Elf_Shdr *sec = findSectionByType(SHT_MIPS_OPTIONS)) { + auto contents = this->getSectionContents(sec); + if (std::error_code ec = contents.getError()) + return ec; + + ArrayRef<uint8_t> raw = contents.get(); + while (!raw.empty()) { + if (raw.size() < sizeof(Elf_Mips_Options)) + return make_dynamic_error_code( + StringRef("Invalid size of MIPS_OPTIONS section")); + + const auto *opt = reinterpret_cast<const Elf_Mips_Options *>(raw.data()); + if (opt->kind == ODK_REGINFO) { + _gp0 = reinterpret_cast<const Elf_RegInfo *>(opt + 1)->ri_gp_value; + break; + } + raw = raw.slice(opt->size); + } + } else if (const Elf_Shdr *sec = findSectionByType(SHT_MIPS_REGINFO)) { + auto contents = this->getSectionContents(sec); + if (std::error_code ec = contents.getError()) + return ec; + + ArrayRef<uint8_t> raw = contents.get(); + if (raw.size() != sizeof(Elf_RegInfo)) + return make_dynamic_error_code( + StringRef("Invalid size of MIPS_REGINFO section")); + + _gp0 = reinterpret_cast<const Elf_RegInfo *>(raw.data())->ri_gp_value; + } + return std::error_code(); + } + + void createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> content, + range<Elf_Rela_Iter> rels) override { + const auto value = this->getSymbolValue(symbol); + for (const auto &rel : rels) { + if (rel.r_offset < value || value + content.size() <= rel.r_offset) + continue; + auto r = new (this->_readerStorage) MipsELFReference<ELFT>(value, rel); + this->addReferenceToSymbol(r, symbol); + this->_references.push_back(r); + } + } + + void createRelocationReferences(const Elf_Sym *symbol, + ArrayRef<uint8_t> symContent, + ArrayRef<uint8_t> secContent, + range<Elf_Rel_Iter> rels) override { + const auto value = this->getSymbolValue(symbol); + for (Elf_Rel_Iter rit = rels.begin(), eit = rels.end(); rit != eit; ++rit) { + if (rit->r_offset < value || value + symContent.size() <= rit->r_offset) + continue; + + auto r = new (this->_readerStorage) MipsELFReference<ELFT>(value, *rit); + this->addReferenceToSymbol(r, symbol); + this->_references.push_back(r); + + auto addend = readAddend(*rit, secContent); + auto pairRelType = getPairRelocation(*rit); + if (pairRelType != llvm::ELF::R_MIPS_NONE) { + addend <<= 16; + auto mit = findMatchingRelocation(pairRelType, rit, eit); + if (mit != eit) + addend += int16_t(readAddend(*mit, secContent)); + else + // FIXME (simon): Show detailed warning. + llvm::errs() << "lld warning: cannot matching LO16 relocation\n"; + } + this->_references.back()->setAddend(addend); + } + } + + Reference::Addend readAddend(const Elf_Rel &ri, + const ArrayRef<uint8_t> content) const { + const auto &rh = + this->_ctx.template getTargetHandler<ELFT>().getRelocationHandler(); + return static_cast<const MipsRelocationHandler &>(rh) + .readAddend(getPrimaryType(ri), content.data() + ri.r_offset); + } + + uint32_t getPairRelocation(const Elf_Rel &rel) const { + switch (getPrimaryType(rel)) { + case llvm::ELF::R_MIPS_HI16: + return llvm::ELF::R_MIPS_LO16; + case llvm::ELF::R_MIPS_PCHI16: + return llvm::ELF::R_MIPS_PCLO16; + case llvm::ELF::R_MIPS_GOT16: + if (isLocalBinding(rel)) + return llvm::ELF::R_MIPS_LO16; + break; + case llvm::ELF::R_MICROMIPS_HI16: + return llvm::ELF::R_MICROMIPS_LO16; + case llvm::ELF::R_MICROMIPS_GOT16: + if (isLocalBinding(rel)) + return llvm::ELF::R_MICROMIPS_LO16; + break; + default: + // Nothing to do. + break; + } + return llvm::ELF::R_MIPS_NONE; + } + + Elf_Rel_Iter findMatchingRelocation(uint32_t pairRelType, Elf_Rel_Iter rit, + Elf_Rel_Iter eit) const { + return std::find_if(rit, eit, [&](const Elf_Rel &rel) { + return getPrimaryType(rel) == pairRelType && + rel.getSymbol(_isMips64EL) == rit->getSymbol(_isMips64EL); + }); + } + + static uint8_t getPrimaryType(const Elf_Rel &rel) { + return rel.getType(_isMips64EL) & 0xff; + } + bool isLocalBinding(const Elf_Rel &rel) const { + return this->_objFile->getSymbol(rel.getSymbol(_isMips64EL)) + ->getBinding() == llvm::ELF::STB_LOCAL; + } +}; + +template <class ELFT> class MipsDynamicFile : public DynamicFile<ELFT> { +public: + MipsDynamicFile(const MipsLinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.cpp b/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.cpp new file mode 100644 index 000000000000..0ef2c70b8156 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.cpp @@ -0,0 +1,149 @@ +//===- lib/ReaderWriter/ELF/MipsELFFlagsMerger.cpp ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsELFFlagsMerger.h" +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; + +struct MipsISATreeEdge { + unsigned child; + unsigned parent; +}; + +static MipsISATreeEdge isaTree[] = { + // MIPS32R6 and MIPS64R6 are not compatible with other extensions + + // MIPS64 extensions. + {EF_MIPS_ARCH_64R2, EF_MIPS_ARCH_64}, + // MIPS V extensions. + {EF_MIPS_ARCH_64, EF_MIPS_ARCH_5}, + // MIPS IV extensions. + {EF_MIPS_ARCH_5, EF_MIPS_ARCH_4}, + // MIPS III extensions. + {EF_MIPS_ARCH_4, EF_MIPS_ARCH_3}, + // MIPS32 extensions. + {EF_MIPS_ARCH_32R2, EF_MIPS_ARCH_32}, + // MIPS II extensions. + {EF_MIPS_ARCH_3, EF_MIPS_ARCH_2}, + {EF_MIPS_ARCH_32, EF_MIPS_ARCH_2}, + // MIPS I extensions. + {EF_MIPS_ARCH_2, EF_MIPS_ARCH_1}, +}; + +static bool matchMipsISA(unsigned base, unsigned ext) { + if (base == ext) + return true; + if (base == EF_MIPS_ARCH_32 && matchMipsISA(EF_MIPS_ARCH_64, ext)) + return true; + if (base == EF_MIPS_ARCH_32R2 && matchMipsISA(EF_MIPS_ARCH_64R2, ext)) + return true; + for (const auto &edge : isaTree) { + if (ext == edge.child) { + ext = edge.parent; + if (ext == base) + return true; + } + } + return false; +} + +MipsELFFlagsMerger::MipsELFFlagsMerger(bool is64Bits) + : _is64Bit(is64Bits), _flags(0) {} + +uint32_t MipsELFFlagsMerger::getMergedELFFlags() const { return _flags; } + +std::error_code MipsELFFlagsMerger::merge(uint8_t newClass, uint32_t newFlags) { + // Check bitness. + if (_is64Bit != (newClass == ELFCLASS64)) + return make_dynamic_error_code( + Twine("Bitness is incompatible with that of the selected target")); + + // We support two ABI: O32 and N64. The last one does not have + // the corresponding ELF flag. + uint32_t inAbi = newFlags & EF_MIPS_ABI; + uint32_t supportedAbi = _is64Bit ? 0 : uint32_t(EF_MIPS_ABI_O32); + if (inAbi != supportedAbi) + return make_dynamic_error_code(Twine("Unsupported ABI")); + + // ... and reduced set of architectures ... + uint32_t newArch = newFlags & EF_MIPS_ARCH; + switch (newArch) { + case EF_MIPS_ARCH_1: + case EF_MIPS_ARCH_2: + case EF_MIPS_ARCH_3: + case EF_MIPS_ARCH_4: + case EF_MIPS_ARCH_5: + case EF_MIPS_ARCH_32: + case EF_MIPS_ARCH_64: + case EF_MIPS_ARCH_32R2: + case EF_MIPS_ARCH_64R2: + case EF_MIPS_ARCH_32R6: + case EF_MIPS_ARCH_64R6: + break; + default: + return make_dynamic_error_code(Twine("Unsupported instruction set")); + } + + // ... and still do not support MIPS-16 extension. + if (newFlags & EF_MIPS_ARCH_ASE_M16) + return make_dynamic_error_code(Twine("Unsupported extension: MIPS16")); + + // PIC code is inherently CPIC and may not set CPIC flag explicitly. + // Ensure that this flag will exist in the linked file. + if (newFlags & EF_MIPS_PIC) + newFlags |= EF_MIPS_CPIC; + + std::lock_guard<std::mutex> lock(_mutex); + + // If the old set of flags is empty, use the new one as a result. + if (!_flags) { + _flags = newFlags; + return std::error_code(); + } + + // Check PIC / CPIC flags compatibility. + uint32_t newPic = newFlags & (EF_MIPS_PIC | EF_MIPS_CPIC); + uint32_t oldPic = _flags & (EF_MIPS_PIC | EF_MIPS_CPIC); + + if ((newPic != 0) != (oldPic != 0)) + llvm::errs() << "lld warning: linking abicalls and non-abicalls files\n"; + + if (!(newPic & EF_MIPS_PIC)) + _flags &= ~EF_MIPS_PIC; + if (newPic) + _flags |= EF_MIPS_CPIC; + + // Check mixing -mnan=2008 / -mnan=legacy modules. + if ((newFlags & EF_MIPS_NAN2008) != (_flags & EF_MIPS_NAN2008)) + return make_dynamic_error_code( + Twine("Linking -mnan=2008 and -mnan=legacy modules")); + + // Check ISA compatibility and update the extension flag. + uint32_t oldArch = _flags & EF_MIPS_ARCH; + if (!matchMipsISA(newArch, oldArch)) { + if (!matchMipsISA(oldArch, newArch)) + return make_dynamic_error_code( + Twine("Linking modules with incompatible ISA")); + _flags &= ~EF_MIPS_ARCH; + _flags |= newArch; + } + + _flags |= newFlags & EF_MIPS_NOREORDER; + _flags |= newFlags & EF_MIPS_MICROMIPS; + _flags |= newFlags & EF_MIPS_NAN2008; + _flags |= newFlags & EF_MIPS_32BITMODE; + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.h b/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.h new file mode 100644 index 000000000000..6ade86f0163c --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsELFFlagsMerger.h @@ -0,0 +1,36 @@ +//===- lib/ReaderWriter/ELF/MipsELFFlagsMerger.h --------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_FLAGS_MERGER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_FLAGS_MERGER_H + +#include <mutex> +#include <system_error> + +namespace lld { +namespace elf { + +class MipsELFFlagsMerger { +public: + MipsELFFlagsMerger(bool is64Bits); + + uint32_t getMergedELFFlags() const; + + /// \brief Merge saved ELF header flags and the new set of flags. + std::error_code merge(uint8_t newClass, uint32_t newFlags); + +private: + const bool _is64Bit; + std::mutex _mutex; + uint32_t _flags; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsELFReader.h b/lib/ReaderWriter/ELF/Mips/MipsELFReader.h new file mode 100644 index 000000000000..8b325b38bb52 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsELFReader.h @@ -0,0 +1,93 @@ +//===- lib/ReaderWriter/ELF/MipsELFReader.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_READER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_READER_H + +#include "ELFReader.h" +#include "MipsELFFile.h" +#include "MipsELFFlagsMerger.h" +#include "MipsLinkingContext.h" + +namespace lld { +namespace elf { + +struct MipsELFFileCreateTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + MipsLinkingContext &ctx) { + return lld::elf::MipsELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct MipsDynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + MipsLinkingContext &ctx) { + return lld::elf::MipsDynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +template <class ELFT> +class MipsELFObjectReader + : public ELFObjectReader<ELFT, MipsELFFileCreateTraits, + MipsLinkingContext> { + typedef ELFObjectReader<ELFT, MipsELFFileCreateTraits, MipsLinkingContext> + BaseReaderType; + +public: + MipsELFObjectReader(MipsLinkingContext &ctx) + : BaseReaderType(ctx, llvm::ELF::EM_MIPS), + _flagMerger(ctx.getELFFlagsMerger()) {} + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®istry, + std::vector<std::unique_ptr<File>> &result) const override { + auto &hdr = *this->elfHeader(*mb); + if (std::error_code ec = _flagMerger.merge(hdr.getFileClass(), hdr.e_flags)) + return ec; + return BaseReaderType::loadFile(std::move(mb), registry, result); + } + +private: + MipsELFFlagsMerger &_flagMerger; +}; + +template <class ELFT> +class MipsELFDSOReader + : public ELFDSOReader<ELFT, MipsDynamicFileCreateELFTraits, + MipsLinkingContext> { + typedef ELFDSOReader<ELFT, MipsDynamicFileCreateELFTraits, MipsLinkingContext> + BaseReaderType; + +public: + MipsELFDSOReader(MipsLinkingContext &ctx) + : BaseReaderType(ctx, llvm::ELF::EM_MIPS), + _flagMerger(ctx.getELFFlagsMerger()) {} + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®istry, + std::vector<std::unique_ptr<File>> &result) const override { + auto &hdr = *this->elfHeader(*mb); + if (std::error_code ec = _flagMerger.merge(hdr.getFileClass(), hdr.e_flags)) + return ec; + return BaseReaderType::loadFile(std::move(mb), registry, result); + } + +private: + MipsELFFlagsMerger &_flagMerger; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsELFWriters.h b/lib/ReaderWriter/ELF/Mips/MipsELFWriters.h new file mode 100644 index 000000000000..d94dd757a0f3 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsELFWriters.h @@ -0,0 +1,82 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsELFWriters.h -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_WRITERS_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_ELF_WRITERS_H + +#include "MipsLinkingContext.h" +#include "OutputELFWriter.h" + +namespace lld { +namespace elf { + +template <class ELFT> class MipsRuntimeFile; + +template <class ELFT> class MipsTargetLayout; + +template <typename ELFT> class MipsELFWriter { +public: + MipsELFWriter(MipsLinkingContext &ctx, MipsTargetLayout<ELFT> &targetLayout) + : _ctx(ctx), _targetLayout(targetLayout) {} + + void setELFHeader(ELFHeader<ELFT> &elfHeader) { + elfHeader.e_version(1); + elfHeader.e_ident(llvm::ELF::EI_VERSION, llvm::ELF::EV_CURRENT); + elfHeader.e_ident(llvm::ELF::EI_OSABI, llvm::ELF::ELFOSABI_NONE); + if (_targetLayout.findOutputSection(".got.plt")) + elfHeader.e_ident(llvm::ELF::EI_ABIVERSION, 1); + else + elfHeader.e_ident(llvm::ELF::EI_ABIVERSION, 0); + + elfHeader.e_flags(_ctx.getMergedELFFlags()); + } + + void finalizeMipsRuntimeAtomValues() { + if (!_ctx.isDynamic()) + return; + + auto gotSection = _targetLayout.findOutputSection(".got"); + auto got = gotSection ? gotSection->virtualAddr() : 0; + auto gp = gotSection ? got + _targetLayout.getGPOffset() : 0; + + setAtomValue("_GLOBAL_OFFSET_TABLE_", got); + setAtomValue("_gp", gp); + setAtomValue("_gp_disp", gp); + setAtomValue("__gnu_local_gp", gp); + } + + bool hasGlobalGOTEntry(const Atom *a) const { + return _targetLayout.getGOTSection().hasGlobalGOTEntry(a); + } + + std::unique_ptr<MipsRuntimeFile<ELFT>> createRuntimeFile() { + auto file = llvm::make_unique<MipsRuntimeFile<ELFT>>(_ctx); + if (_ctx.isDynamic()) { + file->addAbsoluteAtom("_GLOBAL_OFFSET_TABLE_"); + file->addAbsoluteAtom("_gp"); + file->addAbsoluteAtom("_gp_disp"); + file->addAbsoluteAtom("__gnu_local_gp"); + } + return file; + } + +private: + MipsLinkingContext &_ctx; + MipsTargetLayout<ELFT> &_targetLayout; + + void setAtomValue(StringRef name, uint64_t value) { + auto atom = _targetLayout.findAbsoluteAtom(name); + assert(atom != _targetLayout.absoluteAtoms().end()); + (*atom)->_virtualAddr = value; + } +}; + +} // elf +} // lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsExecutableWriter.h b/lib/ReaderWriter/ELF/Mips/MipsExecutableWriter.h new file mode 100644 index 000000000000..1a85bba3bd0f --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsExecutableWriter.h @@ -0,0 +1,154 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsExecutableWriter.h -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_EXECUTABLE_WRITER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_EXECUTABLE_WRITER_H + +#include "ExecutableWriter.h" +#include "MipsDynamicTable.h" +#include "MipsELFWriters.h" +#include "MipsLinkingContext.h" + +namespace lld { +namespace elf { + +template <typename ELFT> class MipsTargetLayout; + +template <class ELFT> +class MipsExecutableWriter : public ExecutableWriter<ELFT> { +public: + MipsExecutableWriter(MipsLinkingContext &ctx, MipsTargetLayout<ELFT> &layout); + +protected: + void buildDynamicSymbolTable(const File &file) override; + + // Add any runtime files and their atoms to the output + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + void finalizeDefaultAtomValues() override; + std::error_code setELFHeader() override; + + unique_bump_ptr<SymbolTable<ELFT>> createSymbolTable() override; + unique_bump_ptr<DynamicTable<ELFT>> createDynamicTable() override; + + unique_bump_ptr<DynamicSymbolTable<ELFT>> + createDynamicSymbolTable() override; + +private: + MipsELFWriter<ELFT> _writeHelper; + MipsTargetLayout<ELFT> &_mipsTargetLayout; +}; + +template <class ELFT> +MipsExecutableWriter<ELFT>::MipsExecutableWriter(MipsLinkingContext &ctx, + MipsTargetLayout<ELFT> &layout) + : ExecutableWriter<ELFT>(ctx, layout), _writeHelper(ctx, layout), + _mipsTargetLayout(layout) {} + +template <class ELFT> +std::error_code MipsExecutableWriter<ELFT>::setELFHeader() { + std::error_code ec = ExecutableWriter<ELFT>::setELFHeader(); + if (ec) + return ec; + + StringRef entryName = this->_context.entrySymbolName(); + if (const AtomLayout *al = this->_layout.findAtomLayoutByName(entryName)) { + const auto *ea = cast<DefinedAtom>(al->_atom); + if (ea->codeModel() == DefinedAtom::codeMipsMicro || + ea->codeModel() == DefinedAtom::codeMipsMicroPIC) + // Adjust entry symbol value if this symbol is microMIPS encoded. + this->_elfHeader->e_entry(al->_virtualAddr | 1); + } + + _writeHelper.setELFHeader(*this->_elfHeader); + return std::error_code(); +} + +template <class ELFT> +void MipsExecutableWriter<ELFT>::buildDynamicSymbolTable(const File &file) { + // MIPS ABI requires to add to dynsym even undefined symbols + // if they have a corresponding entries in a global part of GOT. + for (auto sec : this->_layout.sections()) + if (auto section = dyn_cast<AtomSection<ELFT>>(sec)) + for (const auto &atom : section->atoms()) { + if (_writeHelper.hasGlobalGOTEntry(atom->_atom)) { + this->_dynamicSymbolTable->addSymbol(atom->_atom, section->ordinal(), + atom->_virtualAddr, atom); + continue; + } + + const DefinedAtom *da = dyn_cast<const DefinedAtom>(atom->_atom); + if (!da) + continue; + + if (da->dynamicExport() != DefinedAtom::dynamicExportAlways && + !this->_context.isDynamicallyExportedSymbol(da->name()) && + !(this->_context.shouldExportDynamic() && + da->scope() == Atom::Scope::scopeGlobal)) + continue; + + this->_dynamicSymbolTable->addSymbol(atom->_atom, section->ordinal(), + atom->_virtualAddr, atom); + } + + for (const UndefinedAtom *a : file.undefined()) + // FIXME (simon): Consider to move this check to the + // MipsELFUndefinedAtom class method. That allows to + // handle more complex coditions in the future. + if (_writeHelper.hasGlobalGOTEntry(a)) + this->_dynamicSymbolTable->addSymbol(a, ELF::SHN_UNDEF); + + // Skip our immediate parent class method + // ExecutableWriter<ELFT>::buildDynamicSymbolTable because we replaced it + // with our own version. Call OutputELFWriter directly. + OutputELFWriter<ELFT>::buildDynamicSymbolTable(file); +} + +template <class ELFT> +bool MipsExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter<ELFT>::createImplicitFiles(result); + result.push_back(std::move(_writeHelper.createRuntimeFile())); + return true; +} + +template <class ELFT> +void MipsExecutableWriter<ELFT>::finalizeDefaultAtomValues() { + // Finalize the atom values that are part of the parent. + ExecutableWriter<ELFT>::finalizeDefaultAtomValues(); + _writeHelper.finalizeMipsRuntimeAtomValues(); +} + +template <class ELFT> +unique_bump_ptr<SymbolTable<ELFT>> + MipsExecutableWriter<ELFT>::createSymbolTable() { + return unique_bump_ptr<SymbolTable<ELFT>>(new ( + this->_alloc) MipsSymbolTable<ELFT>(this->_context)); +} + +/// \brief create dynamic table +template <class ELFT> +unique_bump_ptr<DynamicTable<ELFT>> + MipsExecutableWriter<ELFT>::createDynamicTable() { + return unique_bump_ptr<DynamicTable<ELFT>>(new ( + this->_alloc) MipsDynamicTable<ELFT>(this->_context, _mipsTargetLayout)); +} + +/// \brief create dynamic symbol table +template <class ELFT> +unique_bump_ptr<DynamicSymbolTable<ELFT>> + MipsExecutableWriter<ELFT>::createDynamicSymbolTable() { + return unique_bump_ptr<DynamicSymbolTable<ELFT>>( + new (this->_alloc) MipsDynamicSymbolTable<ELFT>( + this->_context, _mipsTargetLayout)); +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.cpp b/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.cpp new file mode 100644 index 000000000000..7bffcbeb5c08 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.cpp @@ -0,0 +1,115 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsLinkingContext.cpp -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "MipsCtorsOrderPass.h" +#include "MipsLinkingContext.h" +#include "MipsRelocationPass.h" +#include "MipsTargetHandler.h" + +using namespace lld; +using namespace lld::elf; + +std::unique_ptr<ELFLinkingContext> +MipsLinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::mipsel || + triple.getArch() == llvm::Triple::mips64el) + return std::unique_ptr<ELFLinkingContext>(new MipsLinkingContext(triple)); + return nullptr; +} + +typedef std::unique_ptr<TargetHandlerBase> TargetHandlerBasePtr; + +static TargetHandlerBasePtr createTarget(llvm::Triple triple, + MipsLinkingContext &ctx) { + switch (triple.getArch()) { + case llvm::Triple::mipsel: + return TargetHandlerBasePtr(new MipsTargetHandler<Mips32ELType>(ctx)); + case llvm::Triple::mips64el: + return TargetHandlerBasePtr(new MipsTargetHandler<Mips64ELType>(ctx)); + default: + llvm_unreachable("Unhandled arch"); + } +} + +MipsLinkingContext::MipsLinkingContext(llvm::Triple triple) + : ELFLinkingContext(triple, createTarget(triple, *this)), + _flagsMerger(triple.isArch64Bit()) {} + +uint32_t MipsLinkingContext::getMergedELFFlags() const { + return _flagsMerger.getMergedELFFlags(); +} + +MipsELFFlagsMerger &MipsLinkingContext::getELFFlagsMerger() { + return _flagsMerger; +} + +uint64_t MipsLinkingContext::getBaseAddress() const { + if (_baseAddress == 0 && getOutputELFType() == llvm::ELF::ET_EXEC) + return getTriple().isArch64Bit() ? 0x120000000 : 0x400000; + return _baseAddress; +} + +StringRef MipsLinkingContext::entrySymbolName() const { + if (_outputELFType == elf::ET_EXEC && _entrySymbolName.empty()) + return "__start"; + return _entrySymbolName; +} + +StringRef MipsLinkingContext::getDefaultInterpreter() const { + return getTriple().isArch64Bit() ? "/lib64/ld.so.1" : "/lib/ld.so.1"; +} + +void MipsLinkingContext::addPasses(PassManager &pm) { + auto pass = createMipsRelocationPass(*this); + if (pass) + pm.add(std::move(pass)); + ELFLinkingContext::addPasses(pm); + pm.add(llvm::make_unique<elf::MipsCtorsOrderPass>()); +} + +bool MipsLinkingContext::isDynamicRelocation(const Reference &r) const { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::Mips); + switch (r.kindValue()) { + case llvm::ELF::R_MIPS_COPY: + case llvm::ELF::R_MIPS_REL32: + case llvm::ELF::R_MIPS_TLS_DTPMOD32: + case llvm::ELF::R_MIPS_TLS_DTPREL32: + case llvm::ELF::R_MIPS_TLS_TPREL32: + case llvm::ELF::R_MIPS_TLS_DTPMOD64: + case llvm::ELF::R_MIPS_TLS_DTPREL64: + case llvm::ELF::R_MIPS_TLS_TPREL64: + return true; + default: + return false; + } +} + +bool MipsLinkingContext::isCopyRelocation(const Reference &r) const { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::Mips); + if (r.kindValue() == llvm::ELF::R_MIPS_COPY) + return true; + return false; +} + +bool MipsLinkingContext::isPLTRelocation(const Reference &r) const { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::Mips); + switch (r.kindValue()) { + case llvm::ELF::R_MIPS_JUMP_SLOT: + return true; + default: + return false; + } +} diff --git a/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.h b/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.h new file mode 100644 index 000000000000..824605f5fa7f --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsLinkingContext.h @@ -0,0 +1,68 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsLinkingContext.h ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_LINKING_CONTEXT_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_LINKING_CONTEXT_H + +#include "MipsELFFlagsMerger.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" + +namespace lld { +namespace elf { + +/// \brief Mips internal references. +enum { + /// \brief Do nothing but mark GOT entry as a global one. + LLD_R_MIPS_GLOBAL_GOT = 1024, + /// \brief Apply high 16 bits of symbol + addend. + LLD_R_MIPS_32_HI16 = 1025, + /// \brief The same as R_MIPS_26 but for global symbols. + LLD_R_MIPS_GLOBAL_26 = 1026, + /// \brief Setup hi 16 bits using the symbol this reference refers to. + LLD_R_MIPS_HI16 = 1027, + /// \brief Setup low 16 bits using the symbol this reference refers to. + LLD_R_MIPS_LO16 = 1028, + /// \brief Represents a reference between PLT and dynamic symbol. + LLD_R_MIPS_STO_PLT = 1029, + /// \brief The same as R_MICROMIPS_26_S1 but for global symbols. + LLD_R_MICROMIPS_GLOBAL_26_S1 = 1030, + /// \brief Apply high 32+16 bits of symbol + addend. + LLD_R_MIPS_64_HI16 = 1031, +}; + +typedef llvm::object::ELFType<llvm::support::little, 2, false> Mips32ELType; +typedef llvm::object::ELFType<llvm::support::little, 2, true> Mips64ELType; +typedef llvm::object::ELFType<llvm::support::big, 2, false> Mips32BEType; +typedef llvm::object::ELFType<llvm::support::big, 2, true> Mips64BEType; + +class MipsLinkingContext final : public ELFLinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + MipsLinkingContext(llvm::Triple triple); + + uint32_t getMergedELFFlags() const; + MipsELFFlagsMerger &getELFFlagsMerger(); + + // ELFLinkingContext + uint64_t getBaseAddress() const override; + StringRef entrySymbolName() const override; + StringRef getDefaultInterpreter() const override; + void addPasses(PassManager &pm) override; + bool isRelaOutputFormat() const override { return false; } + bool isDynamicRelocation(const Reference &r) const override; + bool isCopyRelocation(const Reference &r) const override; + bool isPLTRelocation(const Reference &r) const override; + +private: + MipsELFFlagsMerger _flagsMerger; +}; + +} // elf +} // lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.cpp b/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.cpp new file mode 100644 index 000000000000..173ce0e6b1a8 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.cpp @@ -0,0 +1,606 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.cpp ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsTargetHandler.h" +#include "MipsLinkingContext.h" +#include "MipsRelocationHandler.h" + +using namespace lld; +using namespace elf; +using namespace llvm::ELF; +using namespace llvm::support; + +namespace { +enum class CrossJumpMode { + None, // Not a jump or non-isa-cross jump + ToRegular, // cross isa jump to regular symbol + ToMicro // cross isa jump to microMips symbol +}; + +struct MipsRelocationParams { + uint8_t _size; // Relocations's size in bytes + uint64_t _mask; // Read/write mask of relocation + uint8_t _shift; // Relocation's addendum left shift size + bool _shuffle; // Relocation's addendum/result needs to be shuffled +}; + +template <class ELFT> class RelocationHandler : public MipsRelocationHandler { +public: + RelocationHandler(MipsLinkingContext &ctx) : _ctx(ctx) {} + + std::error_code applyRelocation(ELFWriter &writer, + llvm::FileOutputBuffer &buf, + const lld::AtomLayout &atom, + const Reference &ref) const override; + + Reference::Addend readAddend(Reference::KindValue kind, + const uint8_t *content) const override; + +private: + MipsLinkingContext &_ctx; +}; +} + +static MipsRelocationParams getRelocationParams(uint32_t rType) { + switch (rType) { + case R_MIPS_NONE: + return {4, 0x0, 0, false}; + case R_MIPS_64: + case R_MIPS_SUB: + return {8, 0xffffffffffffffffull, 0, false}; + case R_MIPS_32: + case R_MIPS_GPREL32: + case R_MIPS_PC32: + return {4, 0xffffffff, 0, false}; + case LLD_R_MIPS_32_HI16: + return {4, 0xffff0000, 0, false}; + case LLD_R_MIPS_64_HI16: + return {8, 0xffffffffffff0000ull, 0, false}; + case R_MIPS_26: + case LLD_R_MIPS_GLOBAL_26: + return {4, 0x3ffffff, 2, false}; + case R_MIPS_PC18_S3: + return {4, 0x3ffff, 3, false}; + case R_MIPS_PC19_S2: + return {4, 0x7ffff, 2, false}; + case R_MIPS_PC21_S2: + return {4, 0x1fffff, 2, false}; + case R_MIPS_PC26_S2: + return {4, 0x3ffffff, 2, false}; + case R_MIPS_HI16: + case R_MIPS_LO16: + case R_MIPS_PCHI16: + case R_MIPS_PCLO16: + case R_MIPS_GPREL16: + case R_MIPS_GOT16: + case R_MIPS_GOT_DISP: + case R_MIPS_GOT_PAGE: + case R_MIPS_GOT_OFST: + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_TPREL_HI16: + case R_MIPS_TLS_TPREL_LO16: + case LLD_R_MIPS_HI16: + case LLD_R_MIPS_LO16: + return {4, 0xffff, 0, false}; + case R_MICROMIPS_TLS_DTPREL_HI16: + case R_MICROMIPS_TLS_DTPREL_LO16: + case R_MICROMIPS_TLS_TPREL_HI16: + case R_MICROMIPS_TLS_TPREL_LO16: + return {4, 0xffff, 0, true}; + case R_MICROMIPS_26_S1: + case LLD_R_MICROMIPS_GLOBAL_26_S1: + return {4, 0x3ffffff, 1, true}; + case R_MICROMIPS_HI16: + case R_MICROMIPS_LO16: + case R_MICROMIPS_GOT16: + return {4, 0xffff, 0, true}; + case R_MICROMIPS_PC16_S1: + return {4, 0xffff, 1, true}; + case R_MICROMIPS_PC7_S1: + return {4, 0x7f, 1, false}; + case R_MICROMIPS_PC10_S1: + return {4, 0x3ff, 1, false}; + case R_MICROMIPS_PC23_S2: + return {4, 0x7fffff, 2, true}; + case R_MIPS_CALL16: + case R_MIPS_TLS_GD: + case R_MIPS_TLS_LDM: + case R_MIPS_TLS_GOTTPREL: + return {4, 0xffff, 0, false}; + case R_MICROMIPS_CALL16: + case R_MICROMIPS_TLS_GD: + case R_MICROMIPS_TLS_LDM: + case R_MICROMIPS_TLS_GOTTPREL: + return {4, 0xffff, 0, true}; + case R_MIPS_JALR: + return {4, 0x0, 0, false}; + case R_MICROMIPS_JALR: + return {4, 0x0, 0, true}; + case R_MIPS_REL32: + case R_MIPS_JUMP_SLOT: + case R_MIPS_COPY: + case R_MIPS_TLS_DTPMOD32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + // Ignore runtime relocations. + return {4, 0x0, 0, false}; + case R_MIPS_TLS_DTPMOD64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: + return {8, 0x0, 0, false}; + case LLD_R_MIPS_GLOBAL_GOT: + case LLD_R_MIPS_STO_PLT: + // Do nothing. + return {4, 0x0, 0, false}; + default: + llvm_unreachable("Unknown relocation"); + } +} + +/// \brief R_MIPS_32 +/// local/external: word32 S + A (truncate) +static uint32_t reloc32(uint64_t S, int64_t A) { return S + A; } + +/// \brief R_MIPS_64 +/// local/external: word64 S + A (truncate) +static uint64_t reloc64(uint64_t S, int64_t A) { return S + A; } + +/// \brief R_MIPS_SUB +/// local/external: word64 S - A (truncate) +static uint64_t relocSub(uint64_t S, int64_t A) { return S - A; } + +/// \brief R_MIPS_PC32 +/// local/external: word32 S + A i- P (truncate) +static uint32_t relocpc32(uint64_t P, uint64_t S, int64_t A) { + return S + A - P; +} + +/// \brief R_MIPS_26, R_MICROMIPS_26_S1 +/// local : ((A | ((P + 4) & 0x3F000000)) + S) >> 2 +static uint32_t reloc26loc(uint64_t P, uint64_t S, int32_t A, uint32_t shift) { + uint32_t result = (A | ((P + 4) & (0xfc000000 << shift))) + S; + return result >> shift; +} + +/// \brief LLD_R_MIPS_GLOBAL_26, LLD_R_MICROMIPS_GLOBAL_26_S1 +/// external: (sign-extend(A) + S) >> 2 +static uint32_t reloc26ext(uint64_t S, int32_t A, uint32_t shift) { + int32_t result = + shift == 1 ? llvm::SignExtend32<27>(A) : llvm::SignExtend32<28>(A); + return (result + S) >> shift; +} + +/// \brief R_MIPS_HI16, R_MIPS_TLS_DTPREL_HI16, R_MIPS_TLS_TPREL_HI16, +/// R_MICROMIPS_HI16, R_MICROMIPS_TLS_DTPREL_HI16, R_MICROMIPS_TLS_TPREL_HI16, +/// LLD_R_MIPS_HI16 +/// local/external: hi16 (AHL + S) - (short)(AHL + S) (truncate) +/// _gp_disp : hi16 (AHL + GP - P) - (short)(AHL + GP - P) (verify) +static uint32_t relocHi16(uint64_t P, uint64_t S, int64_t AHL, bool isGPDisp) { + int32_t result = isGPDisp ? AHL + S - P : AHL + S; + return (result + 0x8000) >> 16; +} + +/// \brief R_MIPS_PCHI16 +/// local/external: hi16 (S + AHL - P) +static uint32_t relocPcHi16(uint64_t P, uint64_t S, int64_t AHL) { + int32_t result = S + AHL - P; + return (result + 0x8000) >> 16; +} + +/// \brief R_MIPS_LO16, R_MIPS_TLS_DTPREL_LO16, R_MIPS_TLS_TPREL_LO16, +/// R_MICROMIPS_LO16, R_MICROMIPS_TLS_DTPREL_LO16, R_MICROMIPS_TLS_TPREL_LO16, +/// LLD_R_MIPS_LO16 +/// local/external: lo16 AHL + S (truncate) +/// _gp_disp : lo16 AHL + GP - P + 4 (verify) +static uint32_t relocLo16(uint64_t P, uint64_t S, int64_t AHL, bool isGPDisp, + bool micro) { + int32_t result = isGPDisp ? AHL + S - P + (micro ? 3 : 4) : AHL + S; + return result; +} + +/// \brief R_MIPS_PCLO16 +/// local/external: lo16 (S + AHL - P) +static uint32_t relocPcLo16(uint64_t P, uint64_t S, int64_t AHL) { + AHL = llvm::SignExtend32<16>(AHL); + int32_t result = S + AHL - P; + return result; +} + +/// \brief R_MIPS_GOT16, R_MIPS_CALL16, R_MICROMIPS_GOT16, R_MICROMIPS_CALL16 +/// rel16 G (verify) +static uint64_t relocGOT(uint64_t S, uint64_t GP) { + int64_t G = (int64_t)(S - GP); + return G; +} + +/// R_MIPS_GOT_OFST +/// rel16 offset of (S+A) from the page pointer (verify) +static uint32_t relocGOTOfst(uint64_t S, int64_t A) { + uint64_t page = (S + A + 0x8000) & ~0xffff; + return S + A - page; +} + +/// \brief R_MIPS_GPREL16 +/// local: sign-extend(A) + S + GP0 - GP +/// external: sign-extend(A) + S - GP +static uint64_t relocGPRel16(uint64_t S, int64_t A, uint64_t GP) { + // We added GP0 to addendum for a local symbol during a Relocation pass. + return llvm::SignExtend32<16>(A) + S - GP; +} + +/// \brief R_MIPS_GPREL32 +/// local: rel32 A + S + GP0 - GP (truncate) +static uint64_t relocGPRel32(uint64_t S, int64_t A, uint64_t GP) { + // We added GP0 to addendum for a local symbol during a Relocation pass. + return A + S - GP; +} + +/// \brief R_MIPS_PC18_S3 +/// local/external: (S + A - P) >> 3 (P with cleared 3 less significant bits) +static uint32_t relocPc18(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<21>(A); + // FIXME (simon): Check that S + A has 8-byte alignment + int32_t result = S + A - ((P | 7) ^ 7); + return result >> 3; +} + +/// \brief R_MIPS_PC19_S2 +/// local/external: (S + A - P) >> 2 +static uint32_t relocPc19(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<21>(A); + // FIXME (simon): Check that S + A has 4-byte alignment + int32_t result = S + A - P; + return result >> 2; +} + +/// \brief R_MIPS_PC21_S2 +/// local/external: (S + A - P) >> 2 +static uint32_t relocPc21(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<23>(A); + // FIXME (simon): Check that S + A has 4-byte alignment + int32_t result = S + A - P; + return result >> 2; +} + +/// \brief R_MIPS_PC26_S2 +/// local/external: (S + A - P) >> 2 +static uint32_t relocPc26(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<28>(A); + // FIXME (simon): Check that S + A has 4-byte alignment + int32_t result = S + A - P; + return result >> 2; +} + +/// \brief R_MICROMIPS_PC7_S1 +static uint32_t relocPc7(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<8>(A); + int32_t result = S + A - P; + return result >> 1; +} + +/// \brief R_MICROMIPS_PC10_S1 +static uint32_t relocPc10(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<11>(A); + int32_t result = S + A - P; + return result >> 1; +} + +/// \brief R_MICROMIPS_PC16_S1 +static uint32_t relocPc16(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<17>(A); + int32_t result = S + A - P; + return result >> 1; +} + +/// \brief R_MICROMIPS_PC23_S2 +static uint32_t relocPc23(uint64_t P, uint64_t S, int64_t A) { + A = llvm::SignExtend32<25>(A); + int32_t result = S + A - P; + + // Check addiupc 16MB range. + if (result + 0x1000000 >= 0x2000000) + llvm::errs() << "The addiupc instruction immediate " + << llvm::format_hex(result, 10) << " is out of range.\n"; + + return result >> 2; +} + +/// \brief LLD_R_MIPS_32_HI16, LLD_R_MIPS_64_HI16 +static uint64_t relocMaskLow16(uint64_t S, int64_t A) { + return S + A + 0x8000; +} + +static std::error_code adjustJumpOpCode(uint64_t &ins, uint64_t tgt, + CrossJumpMode mode) { + if (mode == CrossJumpMode::None) + return std::error_code(); + + bool toMicro = mode == CrossJumpMode::ToMicro; + uint32_t opNative = toMicro ? 0x03 : 0x3d; + uint32_t opCross = toMicro ? 0x1d : 0x3c; + + if ((tgt & 1) != toMicro) + return make_dynamic_error_code( + Twine("Incorrect bit 0 for the jalx target")); + + if (tgt & 2) + return make_dynamic_error_code(Twine("The jalx target 0x") + + Twine::utohexstr(tgt) + + " is not word-aligned"); + uint8_t op = ins >> 26; + if (op != opNative && op != opCross) + return make_dynamic_error_code(Twine("Unsupported jump opcode (0x") + + Twine::utohexstr(op) + + ") for ISA modes cross call"); + + ins = (ins & ~(0x3f << 26)) | (opCross << 26); + return std::error_code(); +} + +static bool isMicroMipsAtom(const Atom *a) { + if (const auto *da = dyn_cast<DefinedAtom>(a)) + return da->codeModel() == DefinedAtom::codeMipsMicro || + da->codeModel() == DefinedAtom::codeMipsMicroPIC; + return false; +} + +static CrossJumpMode getCrossJumpMode(const Reference &ref) { + if (!isa<DefinedAtom>(ref.target())) + return CrossJumpMode::None; + bool isTgtMicro = isMicroMipsAtom(ref.target()); + switch (ref.kindValue()) { + case R_MIPS_26: + case LLD_R_MIPS_GLOBAL_26: + return isTgtMicro ? CrossJumpMode::ToMicro : CrossJumpMode::None; + case R_MICROMIPS_26_S1: + case LLD_R_MICROMIPS_GLOBAL_26_S1: + return isTgtMicro ? CrossJumpMode::None : CrossJumpMode::ToRegular; + default: + return CrossJumpMode::None; + } +} + +static uint32_t microShuffle(uint32_t ins) { + return ((ins & 0xffff) << 16) | ((ins & 0xffff0000) >> 16); +} + +static ErrorOr<uint64_t> calculateRelocation(Reference::KindValue kind, + Reference::Addend addend, + uint64_t tgtAddr, uint64_t relAddr, + uint64_t gpAddr, bool isGP, + CrossJumpMode jumpMode) { + bool isCrossJump = jumpMode != CrossJumpMode::None; + switch (kind) { + case R_MIPS_NONE: + return 0; + case R_MIPS_32: + return reloc32(tgtAddr, addend); + case R_MIPS_64: + return reloc64(tgtAddr, addend); + case R_MIPS_SUB: + return relocSub(tgtAddr, addend); + case R_MIPS_26: + return reloc26loc(relAddr, tgtAddr, addend, 2); + case R_MICROMIPS_26_S1: + return reloc26loc(relAddr, tgtAddr, addend, isCrossJump ? 2 : 1); + case R_MIPS_HI16: + case R_MICROMIPS_HI16: + return relocHi16(relAddr, tgtAddr, addend, isGP); + case R_MIPS_PCHI16: + return relocPcHi16(relAddr, tgtAddr, addend); + case R_MIPS_LO16: + return relocLo16(relAddr, tgtAddr, addend, isGP, false); + case R_MIPS_PCLO16: + return relocPcLo16(relAddr, tgtAddr, addend); + case R_MICROMIPS_LO16: + return relocLo16(relAddr, tgtAddr, addend, isGP, true); + case R_MIPS_GOT16: + case R_MIPS_CALL16: + case R_MIPS_GOT_DISP: + case R_MIPS_GOT_PAGE: + case R_MICROMIPS_GOT16: + case R_MICROMIPS_CALL16: + case R_MIPS_TLS_GD: + case R_MIPS_TLS_LDM: + case R_MIPS_TLS_GOTTPREL: + case R_MICROMIPS_TLS_GD: + case R_MICROMIPS_TLS_LDM: + case R_MICROMIPS_TLS_GOTTPREL: + return relocGOT(tgtAddr, gpAddr); + case R_MIPS_GOT_OFST: + return relocGOTOfst(tgtAddr, addend); + case R_MIPS_PC18_S3: + return relocPc18(relAddr, tgtAddr, addend); + case R_MIPS_PC19_S2: + return relocPc19(relAddr, tgtAddr, addend); + case R_MIPS_PC21_S2: + return relocPc21(relAddr, tgtAddr, addend); + case R_MIPS_PC26_S2: + return relocPc26(relAddr, tgtAddr, addend); + case R_MICROMIPS_PC7_S1: + return relocPc7(relAddr, tgtAddr, addend); + case R_MICROMIPS_PC10_S1: + return relocPc10(relAddr, tgtAddr, addend); + case R_MICROMIPS_PC16_S1: + return relocPc16(relAddr, tgtAddr, addend); + case R_MICROMIPS_PC23_S2: + return relocPc23(relAddr, tgtAddr, addend); + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_TPREL_HI16: + case R_MICROMIPS_TLS_DTPREL_HI16: + case R_MICROMIPS_TLS_TPREL_HI16: + return relocHi16(0, tgtAddr, addend, false); + case R_MIPS_TLS_DTPREL_LO16: + case R_MIPS_TLS_TPREL_LO16: + return relocLo16(0, tgtAddr, addend, false, false); + case R_MICROMIPS_TLS_DTPREL_LO16: + case R_MICROMIPS_TLS_TPREL_LO16: + return relocLo16(0, tgtAddr, addend, false, true); + case R_MIPS_GPREL16: + return relocGPRel16(tgtAddr, addend, gpAddr); + case R_MIPS_GPREL32: + return relocGPRel32(tgtAddr, addend, gpAddr); + case R_MIPS_JALR: + case R_MICROMIPS_JALR: + // We do not do JALR optimization now. + return 0; + case R_MIPS_REL32: + case R_MIPS_JUMP_SLOT: + case R_MIPS_COPY: + case R_MIPS_TLS_DTPMOD32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL32: + case R_MIPS_TLS_DTPMOD64: + case R_MIPS_TLS_DTPREL64: + case R_MIPS_TLS_TPREL64: + // Ignore runtime relocations. + return 0; + case R_MIPS_PC32: + return relocpc32(relAddr, tgtAddr, addend); + case LLD_R_MIPS_GLOBAL_GOT: + // Do nothing. + case LLD_R_MIPS_32_HI16: + case LLD_R_MIPS_64_HI16: + return relocMaskLow16(tgtAddr, addend); + case LLD_R_MIPS_GLOBAL_26: + return reloc26ext(tgtAddr, addend, 2); + case LLD_R_MICROMIPS_GLOBAL_26_S1: + return reloc26ext(tgtAddr, addend, isCrossJump ? 2 : 1); + case LLD_R_MIPS_HI16: + return relocHi16(0, tgtAddr, 0, false); + case LLD_R_MIPS_LO16: + return relocLo16(0, tgtAddr, 0, false, false); + case LLD_R_MIPS_STO_PLT: + // Do nothing. + return 0; + default: + return make_unhandled_reloc_error(); + } +} + +template <class ELFT> +static uint64_t relocRead(const MipsRelocationParams ¶ms, + const uint8_t *loc) { + uint64_t data; + switch (params._size) { + case 4: + data = endian::read<uint32_t, ELFT::TargetEndianness, unaligned>(loc); + break; + case 8: + data = endian::read<uint64_t, ELFT::TargetEndianness, unaligned>(loc); + break; + default: + llvm_unreachable("Unexpected size"); + } + if (params._shuffle) + data = microShuffle(data); + return data; +} + +template <class ELFT> +static void relocWrite(uint64_t data, const MipsRelocationParams ¶ms, + uint8_t *loc) { + if (params._shuffle) + data = microShuffle(data); + switch (params._size) { + case 4: + endian::write<uint32_t, ELFT::TargetEndianness, unaligned>(loc, data); + break; + case 8: + endian::write<uint64_t, ELFT::TargetEndianness, unaligned>(loc, data); + break; + default: + llvm_unreachable("Unexpected size"); + } +} + +template <class ELFT> +std::error_code RelocationHandler<ELFT>::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + if (ref.kindNamespace() != lld::Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::Mips); + + auto &targetLayout = static_cast<MipsTargetLayout<ELFT> &>( + _ctx.getTargetHandler<ELFT>().getTargetLayout()); + + AtomLayout *gpAtom = targetLayout.getGP(); + uint64_t gpAddr = gpAtom ? gpAtom->_virtualAddr : 0; + + AtomLayout *gpDispAtom = targetLayout.getGPDisp(); + bool isGpDisp = gpDispAtom && ref.target() == gpDispAtom->_atom; + + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t tgtAddr = writer.addressOfAtom(ref.target()); + uint64_t relAddr = atom._virtualAddr + ref.offsetInAtom(); + + if (isMicroMipsAtom(ref.target())) + tgtAddr |= 1; + + CrossJumpMode jumpMode = getCrossJumpMode(ref); + + ErrorOr<uint64_t> res = + calculateRelocation(ref.kindValue(), ref.addend(), tgtAddr, relAddr, + gpAddr, isGpDisp, jumpMode); + if (auto ec = res.getError()) + return ec; + + Reference::KindValue op = ref.kindValue(); + + // FIXME (simon): Handle r_ssym value. + for (auto tag = (ref.tag() & 0xffff); tag & 0xff; tag >>= 8) { + op = tag & 0xff; + res = calculateRelocation(op, *res, 0, relAddr, gpAddr, isGpDisp, jumpMode); + if (auto ec = res.getError()) + return ec; + } + + auto params = getRelocationParams(op); + uint64_t ins = relocRead<ELFT>(params, location); + + if (auto ec = adjustJumpOpCode(ins, tgtAddr, jumpMode)) + return ec; + + ins = (ins & ~params._mask) | (*res & params._mask); + relocWrite<ELFT>(ins, params, location); + + return std::error_code(); +} + +template <class ELFT> +Reference::Addend +RelocationHandler<ELFT>::readAddend(Reference::KindValue kind, + const uint8_t *content) const { + auto params = getRelocationParams(kind); + uint64_t ins = relocRead<ELFT>(params, content); + return (ins & params._mask) << params._shift; +} + +namespace lld { +namespace elf { + +template <> +std::unique_ptr<TargetRelocationHandler> +createMipsRelocationHandler<Mips32ELType>(MipsLinkingContext &ctx) { + return std::unique_ptr<TargetRelocationHandler>( + new RelocationHandler<Mips32ELType>(ctx)); +} + +template <> +std::unique_ptr<TargetRelocationHandler> +createMipsRelocationHandler<Mips64ELType>(MipsLinkingContext &ctx) { + return std::unique_ptr<TargetRelocationHandler>( + new RelocationHandler<Mips64ELType>(ctx)); +} + +} // elf +} // lld diff --git a/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.h b/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.h new file mode 100644 index 000000000000..87066b2b5c10 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsRelocationHandler.h @@ -0,0 +1,31 @@ +//===- lld/ReaderWriter/ELF/Mips/MipsRelocationHandler.h ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_RELOCATION_HANDLER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_RELOCATION_HANDLER_H + +#include "TargetHandler.h" +#include "lld/Core/Reference.h" + +namespace lld { +namespace elf { + +class MipsRelocationHandler : public TargetRelocationHandler { +public: + virtual Reference::Addend readAddend(Reference::KindValue kind, + const uint8_t *content) const = 0; +}; + +template <class ELFT> +std::unique_ptr<TargetRelocationHandler> +createMipsRelocationHandler(MipsLinkingContext &ctx); + +} // elf +} // lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.cpp b/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.cpp new file mode 100644 index 000000000000..a1b3530dfcdf --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.cpp @@ -0,0 +1,1070 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsRelocationPass.cpp -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsELFFile.h" +#include "MipsLinkingContext.h" +#include "MipsRelocationPass.h" +#include "MipsTargetHandler.h" +#include "llvm/ADT/DenseSet.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; + +// Lazy resolver +static const uint8_t mipsGot0AtomContent[] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +// Module pointer +static const uint8_t mipsGotModulePointerAtomContent[] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80 +}; + +// TLS GD Entry +static const uint8_t mipsGotTlsGdAtomContent[] = { + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +// Regular PLT0 entry +static const uint8_t mipsPlt0AtomContent[] = { + 0x00, 0x00, 0x1c, 0x3c, // lui $28, %hi(&GOTPLT[0]) + 0x00, 0x00, 0x99, 0x8f, // lw $25, %lo(&GOTPLT[0])($28) + 0x00, 0x00, 0x9c, 0x27, // addiu $28, $28, %lo(&GOTPLT[0]) + 0x23, 0xc0, 0x1c, 0x03, // subu $24, $24, $28 + 0x21, 0x78, 0xe0, 0x03, // move $15, $31 + 0x82, 0xc0, 0x18, 0x00, // srl $24, $24, 2 + 0x09, 0xf8, 0x20, 0x03, // jalr $25 + 0xfe, 0xff, 0x18, 0x27 // subu $24, $24, 2 +}; + +// microMIPS PLT0 entry +static const uint8_t micromipsPlt0AtomContent[] = { + 0x80, 0x79, 0x00, 0x00, // addiupc $3, (&GOTPLT[0]) - . + 0x23, 0xff, 0x00, 0x00, // lw $25, 0($3) + 0x35, 0x05, // subu $2, $2, $3 + 0x25, 0x25, // srl $2, $2, 2 + 0x02, 0x33, 0xfe, 0xff, // subu $24, $2, 2 + 0xff, 0x0d, // move $15, $31 + 0xf9, 0x45, // jalrs $25 + 0x83, 0x0f, // move $28, $3 + 0x00, 0x0c // nop +}; + +// Regular PLT entry +static const uint8_t mipsPltAAtomContent[] = { + 0x00, 0x00, 0x0f, 0x3c, // lui $15, %hi(.got.plt entry) + 0x00, 0x00, 0xf9, 0x8d, // l[wd] $25, %lo(.got.plt entry)($15) + 0x08, 0x00, 0x20, 0x03, // jr $25 + 0x00, 0x00, 0xf8, 0x25 // addiu $24, $15, %lo(.got.plt entry) +}; + +// microMIPS PLT entry +static const uint8_t micromipsPltAtomContent[] = { + 0x00, 0x79, 0x00, 0x00, // addiupc $2, (.got.plt entry) - . + 0x22, 0xff, 0x00, 0x00, // lw $25, 0($2) + 0x99, 0x45, // jr $25 + 0x02, 0x0f // move $24, $2 +}; + +// R6 PLT entry +static const uint8_t mipsR6PltAAtomContent[] = { + 0x00, 0x00, 0x0f, 0x3c, // lui $15, %hi(.got.plt entry) + 0x00, 0x00, 0xf9, 0x8d, // l[wd] $25, %lo(.got.plt entry)($15) + 0x09, 0x00, 0x20, 0x03, // jr $25 + 0x00, 0x00, 0xf8, 0x25 // addiu $24, $15, %lo(.got.plt entry) +}; + +// LA25 stub entry +static const uint8_t mipsLA25AtomContent[] = { + 0x00, 0x00, 0x19, 0x3c, // lui $25, %hi(func) + 0x00, 0x00, 0x00, 0x08, // j func + 0x00, 0x00, 0x39, 0x27, // addiu $25, $25, %lo(func) + 0x00, 0x00, 0x00, 0x00 // nop +}; + +// microMIPS LA25 stub entry +static const uint8_t micromipsLA25AtomContent[] = { + 0xb9, 0x41, 0x00, 0x00, // lui $25, %hi(func) + 0x00, 0xd4, 0x00, 0x00, // j func + 0x39, 0x33, 0x00, 0x00, // addiu $25, $25, %lo(func) + 0x00, 0x00, 0x00, 0x00 // nop +}; + +namespace { + +/// \brief Abstract base class represent MIPS GOT entries. +class MipsGOTAtom : public GOTAtom { +public: + MipsGOTAtom(const File &f) : GOTAtom(f, ".got") {} + + Alignment alignment() const override { return Alignment(2); } +}; + +/// \brief MIPS GOT entry initialized by zero. +template <typename ELFT> class GOT0Atom : public MipsGOTAtom { +public: + GOT0Atom(const File &f) : MipsGOTAtom(f) {} + + ArrayRef<uint8_t> rawContent() const override; +}; + +template <> ArrayRef<uint8_t> GOT0Atom<Mips32ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGot0AtomContent).slice(4); +} +template <> ArrayRef<uint8_t> GOT0Atom<Mips64ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGot0AtomContent); +} + +/// \brief MIPS GOT entry initialized by zero. +template <typename ELFT> class GOTModulePointerAtom : public MipsGOTAtom { +public: + GOTModulePointerAtom(const File &f) : MipsGOTAtom(f) {} + + ArrayRef<uint8_t> rawContent() const override; +}; + +template <> +ArrayRef<uint8_t> GOTModulePointerAtom<Mips32ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGotModulePointerAtomContent).slice(4); +} +template <> +ArrayRef<uint8_t> GOTModulePointerAtom<Mips64ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGotModulePointerAtomContent); +} + +/// \brief MIPS GOT TLS GD entry. +template <typename ELFT> class GOTTLSGdAtom : public MipsGOTAtom { +public: + GOTTLSGdAtom(const File &f) : MipsGOTAtom(f) {} + + ArrayRef<uint8_t> rawContent() const override; +}; + +template <> ArrayRef<uint8_t> GOTTLSGdAtom<Mips32ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGotTlsGdAtomContent).slice(8); +} + +template <> ArrayRef<uint8_t> GOTTLSGdAtom<Mips64ELType>::rawContent() const { + return llvm::makeArrayRef(mipsGotTlsGdAtomContent); +} + +class GOTPLTAtom : public GOTAtom { +public: + GOTPLTAtom(const File &f) : GOTAtom(f, ".got.plt") {} + GOTPLTAtom(const Atom *a, const File &f) : GOTAtom(f, ".got.plt") { + // Create dynamic relocation to adjust the .got.plt entry at runtime. + addReferenceELF_Mips(R_MIPS_JUMP_SLOT, 0, a, 0); + } + + /// Setup reference to assign initial value to the .got.plt entry. + void setPLT0(const PLTAtom *plt0) { + addReferenceELF_Mips(R_MIPS_32, 0, plt0, 0); + } + + Alignment alignment() const override { return Alignment(2); } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(mipsGot0AtomContent).slice(4); + } +}; + +class PLT0Atom : public PLTAtom { +public: + PLT0Atom(const Atom *got, const File &f) : PLTAtom(f, ".plt") { + // Setup reference to fixup the PLT0 entry. + addReferenceELF_Mips(LLD_R_MIPS_HI16, 0, got, 0); + addReferenceELF_Mips(LLD_R_MIPS_LO16, 4, got, 0); + addReferenceELF_Mips(LLD_R_MIPS_LO16, 8, got, 0); + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(mipsPlt0AtomContent); + } +}; + +class PLT0MicroAtom : public PLTAtom { +public: + PLT0MicroAtom(const Atom *got, const File &f) : PLTAtom(f, ".plt") { + // Setup reference to fixup the PLT0 entry. + addReferenceELF_Mips(R_MICROMIPS_PC23_S2, 0, got, 0); + } + + CodeModel codeModel() const override { return codeMipsMicro; } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(micromipsPlt0AtomContent); + } +}; + +class PLTAAtom : public PLTAtom { +public: + PLTAAtom(const GOTPLTAtom *got, const File &f) : PLTAtom(f, ".plt") { + // Setup reference to fixup the PLT entry. + addReferenceELF_Mips(LLD_R_MIPS_HI16, 0, got, 0); + addReferenceELF_Mips(LLD_R_MIPS_LO16, 4, got, 0); + addReferenceELF_Mips(LLD_R_MIPS_LO16, 12, got, 0); + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(mipsPltAAtomContent); + } +}; + +class PLTR6Atom : public PLTAAtom { +public: + PLTR6Atom(const GOTPLTAtom *got, const File &f) : PLTAAtom(got, f) {} + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(mipsR6PltAAtomContent); + } +}; + +class PLTMicroAtom : public PLTAtom { +public: + PLTMicroAtom(const GOTPLTAtom *got, const File &f) : PLTAtom(f, ".plt") { + // Setup reference to fixup the microMIPS PLT entry. + addReferenceELF_Mips(R_MICROMIPS_PC23_S2, 0, got, 0); + } + + Alignment alignment() const override { return Alignment(1); } + CodeModel codeModel() const override { return codeMipsMicro; } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(micromipsPltAtomContent); + } +}; + +class LA25Atom : public PLTAtom { +public: + LA25Atom(const File &f) : PLTAtom(f, ".text") {} +}; + +class LA25RegAtom : public LA25Atom { +public: + LA25RegAtom(const Atom *a, const File &f) : LA25Atom(f) { + // Setup reference to fixup the LA25 stub entry. + addReferenceELF_Mips(R_MIPS_HI16, 0, a, 0); + addReferenceELF_Mips(R_MIPS_26, 4, a, 0); + addReferenceELF_Mips(R_MIPS_LO16, 8, a, 0); + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(mipsLA25AtomContent); + } +}; + +class LA25MicroAtom : public LA25Atom { +public: + LA25MicroAtom(const Atom *a, const File &f) : LA25Atom(f) { + // Setup reference to fixup the microMIPS LA25 stub entry. + addReferenceELF_Mips(R_MICROMIPS_HI16, 0, a, 0); + addReferenceELF_Mips(R_MICROMIPS_26_S1, 4, a, 0); + addReferenceELF_Mips(R_MICROMIPS_LO16, 8, a, 0); + } + + CodeModel codeModel() const override { return codeMipsMicro; } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(micromipsLA25AtomContent); + } +}; + +class RelocationPassFile : public SimpleFile { +public: + RelocationPassFile(const ELFLinkingContext &ctx) + : SimpleFile("RelocationPassFile") { + setOrdinal(ctx.getNextOrdinalAndIncrement()); + } + + llvm::BumpPtrAllocator _alloc; +}; + +template <typename ELFT> class RelocationPass : public Pass { +public: + RelocationPass(MipsLinkingContext &ctx); + + void perform(std::unique_ptr<MutableFile> &mf) override; + +private: + /// \brief Reference to the linking context. + const MipsLinkingContext &_ctx; + + /// \brief Owner of all the Atoms created by this pass. + RelocationPassFile _file; + + /// \brief Map Atoms and addend to local GOT entries. + typedef std::pair<const Atom *, int64_t> LocalGotMapKeyT; + llvm::DenseMap<LocalGotMapKeyT, GOTAtom *> _gotLocalMap; + llvm::DenseMap<LocalGotMapKeyT, GOTAtom *> _gotLocalPageMap; + + /// \brief Map Atoms to global GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotGlobalMap; + + /// \brief Map Atoms to TLS GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotTLSMap; + + /// \brief Map Atoms to TLS GD GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotTLSGdMap; + + /// \brief GOT entry for the R_xxxMIPS_TLS_LDM relocations. + GOTTLSGdAtom<ELFT> *_gotLDMEntry; + + /// \brief the list of local GOT atoms. + std::vector<GOTAtom *> _localGotVector; + + /// \brief the list of global GOT atoms. + std::vector<GOTAtom *> _globalGotVector; + + /// \brief the list of TLS GOT atoms. + std::vector<GOTAtom *> _tlsGotVector; + + /// \brief Map Atoms to their GOTPLT entries. + llvm::DenseMap<const Atom *, GOTPLTAtom *> _gotpltMap; + + /// \brief Map Atoms to their PLT entries. + llvm::DenseMap<const Atom *, PLTAAtom *> _pltRegMap; + llvm::DenseMap<const Atom *, PLTMicroAtom *> _pltMicroMap; + + /// \brief Map Atoms to their Object entries. + llvm::DenseMap<const Atom *, ObjectAtom *> _objectMap; + + /// \brief Map Atoms to their LA25 entries. + llvm::DenseMap<const Atom *, LA25RegAtom *> _la25RegMap; + llvm::DenseMap<const Atom *, LA25MicroAtom *> _la25MicroMap; + + /// \brief Atoms referenced by static relocations. + llvm::DenseSet<const Atom *> _hasStaticRelocations; + + /// \brief Atoms require pointers equality. + llvm::DenseSet<const Atom *> _requiresPtrEquality; + + /// \brief References which are candidates for converting + /// to the R_MIPS_REL32 relocation. + std::vector<Reference *> _rel32Candidates; + + /// \brief the list of PLT atoms. + std::vector<PLTAtom *> _pltRegVector; + std::vector<PLTAtom *> _pltMicroVector; + + /// \brief the list of GOTPLT atoms. + std::vector<GOTPLTAtom *> _gotpltVector; + + /// \brief the list of Object entries. + std::vector<ObjectAtom *> _objectVector; + + /// \brief the list of LA25 entries. + std::vector<LA25Atom *> _la25Vector; + + /// \brief Handle a specific reference. + void handleReference(const MipsELFDefinedAtom<ELFT> &atom, Reference &ref); + + /// \brief Collect information about the reference to use it + /// later in the handleReference() routine. + void collectReferenceInfo(const MipsELFDefinedAtom<ELFT> &atom, + Reference &ref); + + void handlePlain(const MipsELFDefinedAtom<ELFT> &atom, Reference &ref); + void handle26(const MipsELFDefinedAtom<ELFT> &atom, Reference &ref); + void handleGOT(Reference &ref); + + const GOTAtom *getLocalGOTEntry(const Reference &ref); + const GOTAtom *getLocalGOTPageEntry(const Reference &ref); + const GOTAtom *getGlobalGOTEntry(const Atom *a); + const GOTAtom *getTLSGOTEntry(const Atom *a); + const GOTAtom *getTLSGdGOTEntry(const Atom *a); + const GOTAtom *getTLSLdmGOTEntry(const Atom *a); + const GOTPLTAtom *getGOTPLTEntry(const Atom *a); + const PLTAtom *getPLTEntry(const Atom *a); + const PLTAtom *getPLTRegEntry(const Atom *a); + const PLTAtom *getPLTMicroEntry(const Atom *a); + const LA25Atom *getLA25Entry(const Atom *target, bool isMicroMips); + const LA25Atom *getLA25RegEntry(const Atom *a); + const LA25Atom *getLA25MicroEntry(const Atom *a); + const ObjectAtom *getObjectEntry(const SharedLibraryAtom *a); + + PLTAtom *createPLTHeader(bool isMicroMips); + + bool isLocal(const Atom *a) const; + bool isLocalCall(const Atom *a) const; + bool isDynamic(const Atom *atom) const; + bool requireLA25Stub(const Atom *a) const; + bool requirePLTEntry(const Atom *a) const; + bool requireCopy(const Atom *a) const; + bool mightBeDynamic(const MipsELFDefinedAtom<ELFT> &atom, + Reference::KindValue refKind) const; + bool hasPLTEntry(const Atom *atom) const; + + bool isR6Target() const; +}; + +template <typename ELFT> +RelocationPass<ELFT>::RelocationPass(MipsLinkingContext &ctx) + : _ctx(ctx), _file(ctx), _gotLDMEntry(nullptr) { + _localGotVector.push_back(new (_file._alloc) GOT0Atom<ELFT>(_file)); + _localGotVector.push_back(new (_file._alloc) + GOTModulePointerAtom<ELFT>(_file)); +} + +template <typename ELFT> +void RelocationPass<ELFT>::perform(std::unique_ptr<MutableFile> &mf) { + for (const auto &atom : mf->defined()) + for (const auto &ref : *atom) + collectReferenceInfo(*cast<MipsELFDefinedAtom<ELFT>>(atom), + const_cast<Reference &>(*ref)); + + // Process all references. + for (const auto &atom : mf->defined()) + for (const auto &ref : *atom) + handleReference(*cast<MipsELFDefinedAtom<ELFT>>(atom), + const_cast<Reference &>(*ref)); + + // Create R_MIPS_REL32 relocations. + for (auto *ref : _rel32Candidates) { + if (!isDynamic(ref->target()) || hasPLTEntry(ref->target())) + continue; + ref->setKindValue(R_MIPS_REL32); + if (ELFT::Is64Bits) + static_cast<MipsELFReference<ELFT> *>(ref)->setTag(R_MIPS_64); + if (!isLocalCall(ref->target())) + getGlobalGOTEntry(ref->target()); + } + + uint64_t ordinal = 0; + + for (auto &got : _localGotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + + for (auto &got : _globalGotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + + for (auto &got : _tlsGotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + + // Create and emit PLT0 entry. + PLTAtom *plt0Atom = nullptr; + if (!_pltRegVector.empty()) + plt0Atom = createPLTHeader(false); + else if (!_pltMicroVector.empty()) + plt0Atom = createPLTHeader(true); + + if (plt0Atom) { + plt0Atom->setOrdinal(ordinal++); + mf->addAtom(*plt0Atom); + } + + // Emit regular PLT entries firts. + for (auto &plt : _pltRegVector) { + plt->setOrdinal(ordinal++); + mf->addAtom(*plt); + } + + // microMIPS PLT entries come after regular ones. + for (auto &plt : _pltMicroVector) { + plt->setOrdinal(ordinal++); + mf->addAtom(*plt); + } + + // Assign PLT0 to GOTPLT entries. + assert(_gotpltMap.empty() || plt0Atom); + for (auto &a: _gotpltMap) + a.second->setPLT0(plt0Atom); + + for (auto &gotplt : _gotpltVector) { + gotplt->setOrdinal(ordinal++); + mf->addAtom(*gotplt); + } + + for (auto obj : _objectVector) { + obj->setOrdinal(ordinal++); + mf->addAtom(*obj); + } + + for (auto la25 : _la25Vector) { + la25->setOrdinal(ordinal++); + mf->addAtom(*la25); + } +} + +template <typename ELFT> +void RelocationPass<ELFT>::handleReference(const MipsELFDefinedAtom<ELFT> &atom, + Reference &ref) { + if (!ref.target()) + return; + if (ref.kindNamespace() != lld::Reference::KindNamespace::ELF) + return; + assert(ref.kindArch() == Reference::KindArch::Mips); + switch (ref.kindValue()) { + case R_MIPS_32: + case R_MIPS_PC32: + case R_MIPS_HI16: + case R_MIPS_LO16: + case R_MIPS_PCHI16: + case R_MIPS_PCLO16: + case R_MICROMIPS_HI16: + case R_MICROMIPS_LO16: + // FIXME (simon): Handle dynamic/static linking differently. + handlePlain(atom, ref); + break; + case R_MIPS_26: + case R_MICROMIPS_26_S1: + handle26(atom, ref); + break; + case R_MIPS_GOT16: + case R_MIPS_CALL16: + case R_MICROMIPS_GOT16: + case R_MICROMIPS_CALL16: + case R_MIPS_GOT_DISP: + case R_MIPS_GOT_PAGE: + handleGOT(ref); + break; + case R_MIPS_GOT_OFST: + // Nothing to do. We create GOT page entry in the R_MIPS_GOT_PAGE handler. + break; + case R_MIPS_GPREL16: + if (isLocal(ref.target())) + ref.setAddend(ref.addend() + atom.file().getGP0()); + break; + case R_MIPS_GPREL32: + ref.setAddend(ref.addend() + atom.file().getGP0()); + break; + case R_MIPS_TLS_DTPREL_HI16: + case R_MIPS_TLS_DTPREL_LO16: + case R_MICROMIPS_TLS_DTPREL_HI16: + case R_MICROMIPS_TLS_DTPREL_LO16: + ref.setAddend(ref.addend() - atom.file().getDTPOffset()); + break; + case R_MIPS_TLS_TPREL_HI16: + case R_MIPS_TLS_TPREL_LO16: + case R_MICROMIPS_TLS_TPREL_HI16: + case R_MICROMIPS_TLS_TPREL_LO16: + ref.setAddend(ref.addend() - atom.file().getTPOffset()); + break; + case R_MIPS_TLS_GD: + case R_MICROMIPS_TLS_GD: + ref.setTarget(getTLSGdGOTEntry(ref.target())); + break; + case R_MIPS_TLS_LDM: + case R_MICROMIPS_TLS_LDM: + ref.setTarget(getTLSLdmGOTEntry(ref.target())); + break; + case R_MIPS_TLS_GOTTPREL: + case R_MICROMIPS_TLS_GOTTPREL: + ref.setTarget(getTLSGOTEntry(ref.target())); + break; + } +} + +template <typename ELFT> +static bool isConstrainSym(const MipsELFDefinedAtom<ELFT> &atom, + Reference::KindValue refKind) { + if ((atom.section()->sh_flags & SHF_ALLOC) == 0) + return false; + switch (refKind) { + case R_MIPS_NONE: + case R_MIPS_JALR: + case R_MICROMIPS_JALR: + case R_MIPS_GPREL16: + case R_MIPS_GPREL32: + return false; + default: + return true; + } +} + +template <typename ELFT> +void RelocationPass<ELFT>::collectReferenceInfo( + const MipsELFDefinedAtom<ELFT> &atom, Reference &ref) { + if (!ref.target()) + return; + if (ref.kindNamespace() != lld::Reference::KindNamespace::ELF) + return; + + auto refKind = ref.kindValue(); + if (!isConstrainSym(atom, refKind)) + return; + + if (mightBeDynamic(atom, refKind)) + _rel32Candidates.push_back(&ref); + else + _hasStaticRelocations.insert(ref.target()); + + if (refKind != R_MIPS_CALL16 && refKind != R_MICROMIPS_CALL16 && + refKind != R_MIPS_26 && refKind != R_MICROMIPS_26_S1) + _requiresPtrEquality.insert(ref.target()); +} + +template <typename ELFT> +bool RelocationPass<ELFT>::isLocal(const Atom *a) const { + if (auto *da = dyn_cast<DefinedAtom>(a)) + return da->scope() == Atom::scopeTranslationUnit; + return false; +} + +template <typename ELFT> +static bool isMipsReadonly(const MipsELFDefinedAtom<ELFT> &atom) { + auto secFlags = atom.section()->sh_flags; + auto secType = atom.section()->sh_type; + + if ((secFlags & SHF_ALLOC) == 0) + return false; + if (secType == SHT_NOBITS) + return false; + if ((secFlags & SHF_WRITE) != 0) + return false; + return true; +} + +template <typename ELFT> +bool RelocationPass<ELFT>::mightBeDynamic(const MipsELFDefinedAtom<ELFT> &atom, + Reference::KindValue refKind) const { + if (refKind == R_MIPS_CALL16 || refKind == R_MIPS_GOT16 || + refKind == R_MICROMIPS_CALL16 || refKind == R_MICROMIPS_GOT16) + return true; + + if (refKind != R_MIPS_32 && refKind != R_MIPS_64) + return false; + if ((atom.section()->sh_flags & SHF_ALLOC) == 0) + return false; + + if (_ctx.getOutputELFType() == ET_DYN) + return true; + if (!isMipsReadonly(atom)) + return true; + if (atom.file().isPIC()) + return true; + + return false; +} + +template <typename ELFT> +bool RelocationPass<ELFT>::hasPLTEntry(const Atom *atom) const { + return _pltRegMap.count(atom) || _pltMicroMap.count(atom); +} + +template <typename ELFT> bool RelocationPass<ELFT>::isR6Target() const { + switch (_ctx.getMergedELFFlags() & EF_MIPS_ARCH) { + case EF_MIPS_ARCH_32R6: + case EF_MIPS_ARCH_64R6: + return true; + default: + return false; + } +} + +template <typename ELFT> +bool RelocationPass<ELFT>::requirePLTEntry(const Atom *a) const { + if (!_hasStaticRelocations.count(a)) + return false; + const auto *sa = dyn_cast<ELFDynamicAtom<ELFT>>(a); + if (sa && sa->type() != SharedLibraryAtom::Type::Code) + return false; + const auto *da = dyn_cast<ELFDefinedAtom<ELFT>>(a); + if (da && da->contentType() != DefinedAtom::typeCode) + return false; + if (isLocalCall(a)) + return false; + return true; +} + +template <typename ELFT> +bool RelocationPass<ELFT>::requireCopy(const Atom *a) const { + if (!_hasStaticRelocations.count(a)) + return false; + const auto *sa = dyn_cast<ELFDynamicAtom<ELFT>>(a); + return sa && sa->type() == SharedLibraryAtom::Type::Data; +} + +template <typename ELFT> +bool RelocationPass<ELFT>::isDynamic(const Atom *atom) const { + const auto *da = dyn_cast<const DefinedAtom>(atom); + if (da && da->dynamicExport() == DefinedAtom::dynamicExportAlways) + return true; + + const auto *sa = dyn_cast<SharedLibraryAtom>(atom); + if (sa) + return true; + + if (_ctx.getOutputELFType() == ET_DYN) { + if (da && da->scope() != DefinedAtom::scopeTranslationUnit) + return true; + + const auto *ua = dyn_cast<UndefinedAtom>(atom); + if (ua) + return true; + } + + return false; +} + +template <typename ELFT> +static bool isMicroMips(const MipsELFDefinedAtom<ELFT> &atom) { + return atom.codeModel() == DefinedAtom::codeMipsMicro || + atom.codeModel() == DefinedAtom::codeMipsMicroPIC; +} + +template <typename ELFT> +const LA25Atom *RelocationPass<ELFT>::getLA25Entry(const Atom *target, + bool isMicroMips) { + return isMicroMips ? getLA25MicroEntry(target) : getLA25RegEntry(target); +} + +template <typename ELFT> +const PLTAtom *RelocationPass<ELFT>::getPLTEntry(const Atom *a) { + bool hasMicroCode = _ctx.getMergedELFFlags() & EF_MIPS_MICROMIPS; + + // If file contains microMIPS code try to reuse compressed PLT entry... + if (hasMicroCode) { + auto microPLT = _pltMicroMap.find(a); + if (microPLT != _pltMicroMap.end()) + return microPLT->second; + } + + // ... then try to reuse a regular PLT entry ... + auto regPLT = _pltRegMap.find(a); + if (regPLT != _pltRegMap.end()) + return regPLT->second; + + // ... and finally prefer to create new compressed PLT entry. + return hasMicroCode ? getPLTMicroEntry(a) : getPLTRegEntry(a); +} + +template <typename ELFT> +void RelocationPass<ELFT>::handlePlain(const MipsELFDefinedAtom<ELFT> &atom, + Reference &ref) { + if (!isDynamic(ref.target())) + return; + + if (requirePLTEntry(ref.target())) + ref.setTarget(getPLTEntry(ref.target())); + else if (requireCopy(ref.target())) + ref.setTarget(getObjectEntry(cast<SharedLibraryAtom>(ref.target()))); +} + +template <typename ELFT> +void RelocationPass<ELFT>::handle26(const MipsELFDefinedAtom<ELFT> &atom, + Reference &ref) { + bool isMicro = ref.kindValue() == R_MICROMIPS_26_S1; + assert((isMicro || ref.kindValue() == R_MIPS_26) && "Unexpected relocation"); + + const auto *sla = dyn_cast<SharedLibraryAtom>(ref.target()); + if (sla && sla->type() == SharedLibraryAtom::Type::Code) + ref.setTarget(isMicro ? getPLTMicroEntry(sla) : getPLTRegEntry(sla)); + + if (requireLA25Stub(ref.target())) + ref.setTarget(getLA25Entry(ref.target(), isMicro)); + + if (!isLocal(ref.target())) { + if (isMicro) + ref.setKindValue(LLD_R_MICROMIPS_GLOBAL_26_S1); + else + ref.setKindValue(LLD_R_MIPS_GLOBAL_26); + } +} + +template <typename ELFT> void RelocationPass<ELFT>::handleGOT(Reference &ref) { + if (!isLocalCall(ref.target())) { + ref.setTarget(getGlobalGOTEntry(ref.target())); + return; + } + + if (ref.kindValue() == R_MIPS_GOT_PAGE) + ref.setTarget(getLocalGOTPageEntry(ref)); + else if (ref.kindValue() == R_MIPS_GOT_DISP) + ref.setTarget(getLocalGOTEntry(ref)); + else if (isLocal(ref.target())) + ref.setTarget(getLocalGOTPageEntry(ref)); + else + ref.setTarget(getLocalGOTEntry(ref)); +} + +template <typename ELFT> +bool RelocationPass<ELFT>::isLocalCall(const Atom *a) const { + Atom::Scope scope; + if (auto *da = dyn_cast<DefinedAtom>(a)) + scope = da->scope(); + else if (auto *aa = dyn_cast<AbsoluteAtom>(a)) + scope = aa->scope(); + else + return false; + + // Local and hidden symbols must be local. + if (scope == Atom::scopeTranslationUnit || scope == Atom::scopeLinkageUnit) + return true; + + // Calls to external symbols defined in an executable file resolved locally. + if (_ctx.getOutputELFType() == ET_EXEC) + return true; + + return false; +} + +template <typename ELFT> +bool RelocationPass<ELFT>::requireLA25Stub(const Atom *a) const { + if (isLocal(a)) + return false; + if (auto *da = dyn_cast<DefinedAtom>(a)) + return static_cast<const MipsELFDefinedAtom<ELFT> *>(da)->file().isPIC(); + return false; +} + +template <typename ELFT> +const GOTAtom *RelocationPass<ELFT>::getLocalGOTEntry(const Reference &ref) { + const Atom *a = ref.target(); + LocalGotMapKeyT key(a, ref.addend()); + + auto got = _gotLocalMap.find(key); + if (got != _gotLocalMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOT0Atom<ELFT>(_file); + _gotLocalMap[key] = ga; + + _localGotVector.push_back(ga); + + Reference::KindValue relKind = ELFT::Is64Bits ? R_MIPS_64 : R_MIPS_32; + ga->addReferenceELF_Mips(relKind, 0, a, 0); + + return ga; +} + +template <typename ELFT> +const GOTAtom * +RelocationPass<ELFT>::getLocalGOTPageEntry(const Reference &ref) { + const Atom *a = ref.target(); + LocalGotMapKeyT key(a, ref.addend()); + + auto got = _gotLocalPageMap.find(key); + if (got != _gotLocalPageMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOT0Atom<ELFT>(_file); + _gotLocalPageMap[key] = ga; + + _localGotVector.push_back(ga); + + Reference::KindValue relKind = + ELFT::Is64Bits ? LLD_R_MIPS_64_HI16 : LLD_R_MIPS_32_HI16; + ga->addReferenceELF_Mips(relKind, 0, a, ref.addend()); + + return ga; +} + +template <typename ELFT> +const GOTAtom *RelocationPass<ELFT>::getGlobalGOTEntry(const Atom *a) { + auto got = _gotGlobalMap.find(a); + if (got != _gotGlobalMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOT0Atom<ELFT>(_file); + _gotGlobalMap[a] = ga; + + _globalGotVector.push_back(ga); + ga->addReferenceELF_Mips(LLD_R_MIPS_GLOBAL_GOT, 0, a, 0); + + if (const DefinedAtom *da = dyn_cast<DefinedAtom>(a)) + ga->addReferenceELF_Mips(R_MIPS_32, 0, da, 0); + + return ga; +} + +template <typename ELFT> +const GOTAtom *RelocationPass<ELFT>::getTLSGOTEntry(const Atom *a) { + auto got = _gotTLSMap.find(a); + if (got != _gotTLSMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOT0Atom<ELFT>(_file); + _gotTLSMap[a] = ga; + + _tlsGotVector.push_back(ga); + Reference::KindValue relKind = + ELFT::Is64Bits ? R_MIPS_TLS_TPREL64 : R_MIPS_TLS_TPREL32; + ga->addReferenceELF_Mips(relKind, 0, a, 0); + + return ga; +} + +template <typename ELFT> +const GOTAtom *RelocationPass<ELFT>::getTLSGdGOTEntry(const Atom *a) { + auto got = _gotTLSGdMap.find(a); + if (got != _gotTLSGdMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOTTLSGdAtom<ELFT>(_file); + _gotTLSGdMap[a] = ga; + + _tlsGotVector.push_back(ga); + if (ELFT::Is64Bits) { + ga->addReferenceELF_Mips(R_MIPS_TLS_DTPMOD64, 0, a, 0); + ga->addReferenceELF_Mips(R_MIPS_TLS_DTPREL64, 8, a, 0); + } else { + ga->addReferenceELF_Mips(R_MIPS_TLS_DTPMOD32, 0, a, 0); + ga->addReferenceELF_Mips(R_MIPS_TLS_DTPREL32, 4, a, 0); + } + + return ga; +} + +template <typename ELFT> +const GOTAtom *RelocationPass<ELFT>::getTLSLdmGOTEntry(const Atom *a) { + if (_gotLDMEntry) + return _gotLDMEntry; + + _gotLDMEntry = new (_file._alloc) GOTTLSGdAtom<ELFT>(_file); + _tlsGotVector.push_back(_gotLDMEntry); + if (ELFT::Is64Bits) + _gotLDMEntry->addReferenceELF_Mips(R_MIPS_TLS_DTPMOD64, 0, _gotLDMEntry, 0); + else + _gotLDMEntry->addReferenceELF_Mips(R_MIPS_TLS_DTPMOD32, 0, _gotLDMEntry, 0); + + return _gotLDMEntry; +} + +template <typename ELFT> +PLTAtom *RelocationPass<ELFT>::createPLTHeader(bool isMicroMips) { + auto ga1 = new (_file._alloc) GOTPLTAtom(_file); + _gotpltVector.insert(_gotpltVector.begin(), ga1); + auto ga0 = new (_file._alloc) GOTPLTAtom(_file); + _gotpltVector.insert(_gotpltVector.begin(), ga0); + + if (isMicroMips) + return new (_file._alloc) PLT0MicroAtom(ga0, _file); + else + return new (_file._alloc) PLT0Atom(ga0, _file); +} + +template <typename ELFT> +const GOTPLTAtom *RelocationPass<ELFT>::getGOTPLTEntry(const Atom *a) { + auto it = _gotpltMap.find(a); + if (it != _gotpltMap.end()) + return it->second; + + auto ga = new (_file._alloc) GOTPLTAtom(a, _file); + _gotpltMap[a] = ga; + _gotpltVector.push_back(ga); + return ga; +} + +template <typename ELFT> +const PLTAtom *RelocationPass<ELFT>::getPLTRegEntry(const Atom *a) { + auto plt = _pltRegMap.find(a); + if (plt != _pltRegMap.end()) + return plt->second; + + PLTAAtom *pa = isR6Target() + ? new (_file._alloc) PLTR6Atom(getGOTPLTEntry(a), _file) + : new (_file._alloc) PLTAAtom(getGOTPLTEntry(a), _file); + _pltRegMap[a] = pa; + _pltRegVector.push_back(pa); + + // Check that 'a' dynamic symbol table record should point to the PLT. + if (_hasStaticRelocations.count(a) && _requiresPtrEquality.count(a)) + pa->addReferenceELF_Mips(LLD_R_MIPS_STO_PLT, 0, a, 0); + + return pa; +} + +template <typename ELFT> +const PLTAtom *RelocationPass<ELFT>::getPLTMicroEntry(const Atom *a) { + auto plt = _pltMicroMap.find(a); + if (plt != _pltMicroMap.end()) + return plt->second; + + auto pa = new (_file._alloc) PLTMicroAtom(getGOTPLTEntry(a), _file); + _pltMicroMap[a] = pa; + _pltMicroVector.push_back(pa); + + // Check that 'a' dynamic symbol table record should point to the PLT. + if (_hasStaticRelocations.count(a) && _requiresPtrEquality.count(a)) + pa->addReferenceELF_Mips(LLD_R_MIPS_STO_PLT, 0, a, 0); + + return pa; +} + +template <typename ELFT> +const LA25Atom *RelocationPass<ELFT>::getLA25RegEntry(const Atom *a) { + auto la25 = _la25RegMap.find(a); + if (la25 != _la25RegMap.end()) + return la25->second; + + auto sa = new (_file._alloc) LA25RegAtom(a, _file); + _la25RegMap[a] = sa; + _la25Vector.push_back(sa); + + return sa; +} + +template <typename ELFT> +const LA25Atom *RelocationPass<ELFT>::getLA25MicroEntry(const Atom *a) { + auto la25 = _la25MicroMap.find(a); + if (la25 != _la25MicroMap.end()) + return la25->second; + + auto sa = new (_file._alloc) LA25MicroAtom(a, _file); + _la25MicroMap[a] = sa; + _la25Vector.push_back(sa); + + return sa; +} + +template <typename ELFT> +const ObjectAtom * +RelocationPass<ELFT>::getObjectEntry(const SharedLibraryAtom *a) { + auto obj = _objectMap.find(a); + if (obj != _objectMap.end()) + return obj->second; + + auto oa = new (_file._alloc) ObjectAtom(_file); + oa->addReferenceELF_Mips(R_MIPS_COPY, 0, oa, 0); + oa->_name = a->name(); + oa->_size = a->size(); + + _objectMap[a] = oa; + _objectVector.push_back(oa); + + return oa; +} + +} // end anon namespace + +static std::unique_ptr<Pass> createPass(MipsLinkingContext &ctx) { + switch (ctx.getTriple().getArch()) { + case llvm::Triple::mipsel: + return llvm::make_unique<RelocationPass<Mips32ELType>>(ctx); + case llvm::Triple::mips64el: + return llvm::make_unique<RelocationPass<Mips64ELType>>(ctx); + default: + llvm_unreachable("Unhandled arch"); + } +} + +std::unique_ptr<Pass> +lld::elf::createMipsRelocationPass(MipsLinkingContext &ctx) { + switch (ctx.getOutputELFType()) { + case ET_EXEC: + case ET_DYN: + return createPass(ctx); + case ET_REL: + return nullptr; + default: + llvm_unreachable("Unhandled output file type"); + } +} diff --git a/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.h b/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.h new file mode 100644 index 000000000000..af343de5f027 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsRelocationPass.h @@ -0,0 +1,25 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsRelocationPass.h ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_RELOCATION_PASS_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_RELOCATION_PASS_H + +#include <memory> + +namespace lld { +class Pass; + +namespace elf { +class MipsLinkingContext; + +std::unique_ptr<Pass> createMipsRelocationPass(MipsLinkingContext &ctx); + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsSectionChunks.h b/lib/ReaderWriter/ELF/Mips/MipsSectionChunks.h new file mode 100644 index 000000000000..de9390f2b307 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsSectionChunks.h @@ -0,0 +1,170 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsSectionChunks.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_SECTION_CHUNKS_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_SECTION_CHUNKS_H + +namespace lld { +namespace elf { + +template <typename ELFT> class MipsTargetLayout; +class MipsLinkingContext; + +/// \brief Handle Mips GOT section +template <class ELFType> class MipsGOTSection : public AtomSection<ELFType> { +public: + MipsGOTSection(const MipsLinkingContext &ctx) + : AtomSection<ELFType>(ctx, ".got", DefinedAtom::typeGOT, + DefinedAtom::permRW_, + MipsTargetLayout<ELFType>::ORDER_GOT), + _hasNonLocal(false), _localCount(0) { + this->_flags |= SHF_MIPS_GPREL; + this->_alignment = 4; + } + + /// \brief Number of local GOT entries. + std::size_t getLocalCount() const { return _localCount; } + + /// \brief Number of global GOT entries. + std::size_t getGlobalCount() const { return _posMap.size(); } + + /// \brief Does the atom have a global GOT entry? + bool hasGlobalGOTEntry(const Atom *a) const { + return _posMap.count(a) || _tlsMap.count(a); + } + + /// \brief Compare two atoms accordingly theirs positions in the GOT. + bool compare(const Atom *a, const Atom *b) const { + auto ia = _posMap.find(a); + auto ib = _posMap.find(b); + + if (ia != _posMap.end() && ib != _posMap.end()) + return ia->second < ib->second; + + return ia == _posMap.end() && ib != _posMap.end(); + } + + const lld::AtomLayout *appendAtom(const Atom *atom) override { + const DefinedAtom *da = dyn_cast<DefinedAtom>(atom); + + for (const auto &r : *da) { + if (r->kindNamespace() != lld::Reference::KindNamespace::ELF) + continue; + assert(r->kindArch() == Reference::KindArch::Mips); + switch (r->kindValue()) { + case LLD_R_MIPS_GLOBAL_GOT: + _hasNonLocal = true; + _posMap[r->target()] = _posMap.size(); + return AtomSection<ELFType>::appendAtom(atom); + case R_MIPS_TLS_TPREL32: + case R_MIPS_TLS_DTPREL32: + case R_MIPS_TLS_TPREL64: + case R_MIPS_TLS_DTPREL64: + _hasNonLocal = true; + _tlsMap[r->target()] = _tlsMap.size(); + return AtomSection<ELFType>::appendAtom(atom); + case R_MIPS_TLS_DTPMOD32: + case R_MIPS_TLS_DTPMOD64: + _hasNonLocal = true; + break; + } + } + + if (!_hasNonLocal) + ++_localCount; + + return AtomSection<ELFType>::appendAtom(atom); + } + +private: + /// \brief True if the GOT contains non-local entries. + bool _hasNonLocal; + + /// \brief Number of local GOT entries. + std::size_t _localCount; + + /// \brief Map TLS Atoms to their GOT entry index. + llvm::DenseMap<const Atom *, std::size_t> _tlsMap; + + /// \brief Map Atoms to their GOT entry index. + llvm::DenseMap<const Atom *, std::size_t> _posMap; +}; + +/// \brief Handle Mips PLT section +template <class ELFType> class MipsPLTSection : public AtomSection<ELFType> { +public: + MipsPLTSection(const MipsLinkingContext &ctx) + : AtomSection<ELFType>(ctx, ".plt", DefinedAtom::typeGOT, + DefinedAtom::permR_X, + MipsTargetLayout<ELFType>::ORDER_PLT) {} + + const AtomLayout *findPLTLayout(const Atom *plt) const { + auto it = _pltLayoutMap.find(plt); + return it != _pltLayoutMap.end() ? it->second : nullptr; + } + + const lld::AtomLayout *appendAtom(const Atom *atom) override { + const auto *layout = AtomSection<ELFType>::appendAtom(atom); + + const DefinedAtom *da = cast<DefinedAtom>(atom); + + for (const auto &r : *da) { + if (r->kindNamespace() != lld::Reference::KindNamespace::ELF) + continue; + assert(r->kindArch() == Reference::KindArch::Mips); + if (r->kindValue() == LLD_R_MIPS_STO_PLT) { + _pltLayoutMap[r->target()] = layout; + break; + } + } + + return layout; + } + +private: + /// \brief Map PLT Atoms to their layouts. + std::unordered_map<const Atom *, const AtomLayout *> _pltLayoutMap; +}; + +template <class ELFT> class MipsRelocationTable : public RelocationTable<ELFT> { + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + + static const bool _isMips64EL = + ELFT::Is64Bits && ELFT::TargetEndianness == llvm::support::little; + +public: + MipsRelocationTable(const ELFLinkingContext &context, StringRef str, + int32_t order) + : RelocationTable<ELFT>(context, str, order) {} + +protected: + void writeRela(ELFWriter *writer, Elf_Rela &r, const DefinedAtom &atom, + const Reference &ref) override { + uint32_t rType = ref.kindValue() | (ref.tag() << 8); + r.setSymbolAndType(this->getSymbolIndex(ref.target()), rType, _isMips64EL); + r.r_offset = writer->addressOfAtom(&atom) + ref.offsetInAtom(); + // The addend is used only by relative relocations + if (this->_context.isRelativeReloc(ref)) + r.r_addend = writer->addressOfAtom(ref.target()) + ref.addend(); + else + r.r_addend = 0; + } + + void writeRel(ELFWriter *writer, Elf_Rel &r, const DefinedAtom &atom, + const Reference &ref) override { + uint32_t rType = ref.kindValue() | (ref.tag() << 8); + r.setSymbolAndType(this->getSymbolIndex(ref.target()), rType, _isMips64EL); + r.r_offset = writer->addressOfAtom(&atom) + ref.offsetInAtom(); + } +}; + +} // elf +} // lld + +#endif diff --git a/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.cpp b/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.cpp new file mode 100644 index 000000000000..f60ab63c6af7 --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.cpp @@ -0,0 +1,35 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsTargetHandler.cpp --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsTargetHandler.h" + +using namespace lld; +using namespace elf; + +void MipsRelocationStringTable::registerTable(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, + Reference::KindArch::Mips, kindStrings); +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings MipsRelocationStringTable::kindStrings[] = { +#include "llvm/Support/ELFRelocs/Mips.def" + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_GLOBAL_GOT), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_32_HI16), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_64_HI16), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_GLOBAL_26), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_HI16), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_LO16), + LLD_KIND_STRING_ENTRY(LLD_R_MIPS_STO_PLT), + LLD_KIND_STRING_ENTRY(LLD_R_MICROMIPS_GLOBAL_26_S1), + LLD_KIND_STRING_END +}; + +#undef ELF_RELOC diff --git a/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.h b/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.h new file mode 100644 index 000000000000..79509addf40b --- /dev/null +++ b/lib/ReaderWriter/ELF/Mips/MipsTargetHandler.h @@ -0,0 +1,257 @@ +//===- lib/ReaderWriter/ELF/Mips/MipsTargetHandler.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_MIPS_MIPS_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_MIPS_MIPS_TARGET_HANDLER_H + +#include "DefaultTargetHandler.h" +#include "MipsDynamicLibraryWriter.h" +#include "MipsELFReader.h" +#include "MipsExecutableWriter.h" +#include "MipsLinkingContext.h" +#include "MipsRelocationHandler.h" +#include "MipsSectionChunks.h" +#include "TargetLayout.h" +#include "llvm/ADT/DenseSet.h" + +namespace lld { +namespace elf { + +/// \brief TargetLayout for Mips +template <class ELFT> class MipsTargetLayout final : public TargetLayout<ELFT> { +public: + MipsTargetLayout(MipsLinkingContext &ctx) + : TargetLayout<ELFT>(ctx), + _gotSection(new (this->_allocator) MipsGOTSection<ELFT>(ctx)), + _pltSection(new (this->_allocator) MipsPLTSection<ELFT>(ctx)) {} + + const MipsGOTSection<ELFT> &getGOTSection() const { return *_gotSection; } + const MipsPLTSection<ELFT> &getPLTSection() const { return *_pltSection; } + + AtomSection<ELFT> *createSection(StringRef name, int32_t type, + DefinedAtom::ContentPermissions permissions, + Layout::SectionOrder order) override { + if (type == DefinedAtom::typeGOT && name == ".got") + return _gotSection; + if (type == DefinedAtom::typeStub && name == ".plt") + return _pltSection; + return DefaultLayout<ELFT>::createSection(name, type, permissions, order); + } + + /// \brief GP offset relative to .got section. + uint64_t getGPOffset() const { return 0x7FF0; } + + /// \brief Get '_gp' symbol atom layout. + AtomLayout *getGP() { + if (!_gpAtom.hasValue()) { + auto atom = this->findAbsoluteAtom("_gp"); + _gpAtom = atom != this->absoluteAtoms().end() ? *atom : nullptr; + } + return *_gpAtom; + } + + /// \brief Get '_gp_disp' symbol atom layout. + AtomLayout *getGPDisp() { + if (!_gpDispAtom.hasValue()) { + auto atom = this->findAbsoluteAtom("_gp_disp"); + _gpDispAtom = atom != this->absoluteAtoms().end() ? *atom : nullptr; + } + return *_gpDispAtom; + } + + /// \brief Return the section order for a input section + Layout::SectionOrder getSectionOrder(StringRef name, int32_t contentType, + int32_t contentPermissions) override { + if ((contentType == DefinedAtom::typeStub) && (name.startswith(".text"))) + return DefaultLayout<ELFT>::ORDER_TEXT; + + return DefaultLayout<ELFT>::getSectionOrder(name, contentType, + contentPermissions); + } + +protected: + unique_bump_ptr<RelocationTable<ELFT>> + createRelocationTable(StringRef name, int32_t order) override { + return unique_bump_ptr<RelocationTable<ELFT>>( + new (this->_allocator) + MipsRelocationTable<ELFT>(this->_context, name, order)); + } + +private: + MipsGOTSection<ELFT> *_gotSection; + MipsPLTSection<ELFT> *_pltSection; + llvm::Optional<AtomLayout *> _gpAtom; + llvm::Optional<AtomLayout *> _gpDispAtom; +}; + +/// \brief Mips Runtime file. +template <class ELFT> class MipsRuntimeFile final : public RuntimeFile<ELFT> { +public: + MipsRuntimeFile(MipsLinkingContext &ctx) + : RuntimeFile<ELFT>(ctx, "Mips runtime file") {} +}; + +/// \brief Auxiliary class holds relocation's names table. +class MipsRelocationStringTable { + static const Registry::KindStrings kindStrings[]; + +public: + static void registerTable(Registry ®istry); +}; + +/// \brief TargetHandler for Mips +template <class ELFT> +class MipsTargetHandler final : public DefaultTargetHandler<ELFT> { +public: + MipsTargetHandler(MipsLinkingContext &ctx) + : _ctx(ctx), _runtimeFile(new MipsRuntimeFile<ELFT>(ctx)), + _targetLayout(new MipsTargetLayout<ELFT>(ctx)), + _relocationHandler(createMipsRelocationHandler<ELFT>(ctx)) {} + + MipsTargetLayout<ELFT> &getTargetLayout() override { return *_targetLayout; } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>(new MipsELFObjectReader<ELFT>(_ctx)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>(new MipsELFDSOReader<ELFT>(_ctx)); + } + + const TargetRelocationHandler &getRelocationHandler() const override { + return *_relocationHandler; + } + + std::unique_ptr<Writer> getWriter() override { + switch (_ctx.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>( + new MipsExecutableWriter<ELFT>(_ctx, *_targetLayout)); + case llvm::ELF::ET_DYN: + return std::unique_ptr<Writer>( + new MipsDynamicLibraryWriter<ELFT>(_ctx, *_targetLayout)); + case llvm::ELF::ET_REL: + llvm_unreachable("TODO: support -r mode"); + default: + llvm_unreachable("unsupported output type"); + } + } + + void registerRelocationNames(Registry ®istry) override { + MipsRelocationStringTable::registerTable(registry); + } + +private: + MipsLinkingContext &_ctx; + std::unique_ptr<MipsRuntimeFile<ELFT>> _runtimeFile; + std::unique_ptr<MipsTargetLayout<ELFT>> _targetLayout; + std::unique_ptr<TargetRelocationHandler> _relocationHandler; +}; + +template <class ELFT> class MipsSymbolTable : public SymbolTable<ELFT> { +public: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + + MipsSymbolTable(const ELFLinkingContext &ctx) + : SymbolTable<ELFT>(ctx, ".symtab", + DefaultLayout<ELFT>::ORDER_SYMBOL_TABLE) {} + + void addDefinedAtom(Elf_Sym &sym, const DefinedAtom *da, + int64_t addr) override { + SymbolTable<ELFT>::addDefinedAtom(sym, da, addr); + + switch (da->codeModel()) { + case DefinedAtom::codeMipsMicro: + sym.st_other |= llvm::ELF::STO_MIPS_MICROMIPS; + break; + case DefinedAtom::codeMipsMicroPIC: + sym.st_other |= llvm::ELF::STO_MIPS_MICROMIPS | llvm::ELF::STO_MIPS_PIC; + break; + default: + break; + } + } + + void finalize(bool sort) override { + SymbolTable<ELFT>::finalize(sort); + + for (auto &ste : this->_symbolTable) { + if (!ste._atom) + continue; + if (const auto *da = dyn_cast<DefinedAtom>(ste._atom)) { + if (da->codeModel() == DefinedAtom::codeMipsMicro || + da->codeModel() == DefinedAtom::codeMipsMicroPIC) { + // Adjust dynamic microMIPS symbol value. That allows a dynamic + // linker to recognize and handle this symbol correctly. + ste._symbol.st_value = ste._symbol.st_value | 1; + } + } + } + } +}; + +template <class ELFT> +class MipsDynamicSymbolTable : public DynamicSymbolTable<ELFT> { +public: + MipsDynamicSymbolTable(const ELFLinkingContext &ctx, + MipsTargetLayout<ELFT> &layout) + : DynamicSymbolTable<ELFT>(ctx, layout, ".dynsym", + DefaultLayout<ELFT>::ORDER_DYNAMIC_SYMBOLS), + _targetLayout(layout) {} + + void sortSymbols() override { + typedef typename DynamicSymbolTable<ELFT>::SymbolEntry SymbolEntry; + std::stable_sort(this->_symbolTable.begin(), this->_symbolTable.end(), + [this](const SymbolEntry &A, const SymbolEntry &B) { + if (A._symbol.getBinding() != STB_GLOBAL && + B._symbol.getBinding() != STB_GLOBAL) + return A._symbol.getBinding() < B._symbol.getBinding(); + + return _targetLayout.getGOTSection().compare(A._atom, B._atom); + }); + } + + void finalize() override { + DynamicSymbolTable<ELFT>::finalize(); + + const auto &pltSection = _targetLayout.getPLTSection(); + + for (auto &ste : this->_symbolTable) { + const Atom *a = ste._atom; + if (!a) + continue; + if (auto *layout = pltSection.findPLTLayout(a)) { + a = layout->_atom; + // Under some conditions a dynamic symbol table record should hold + // a symbol value of the corresponding PLT entry. For details look + // at the PLT entry creation code in the class MipsRelocationPass. + // Let's update atomLayout fields for such symbols. + assert(!ste._atomLayout); + ste._symbol.st_value = layout->_virtualAddr; + ste._symbol.st_other |= ELF::STO_MIPS_PLT; + } + + if (const auto *da = dyn_cast<DefinedAtom>(a)) { + if (da->codeModel() == DefinedAtom::codeMipsMicro || + da->codeModel() == DefinedAtom::codeMipsMicroPIC) { + // Adjust dynamic microMIPS symbol value. That allows a dynamic + // linker to recognize and handle this symbol correctly. + ste._symbol.st_value = ste._symbol.st_value | 1; + } + } + } + } + +private: + MipsTargetLayout<ELFT> &_targetLayout; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/OrderPass.h b/lib/ReaderWriter/ELF/OrderPass.h new file mode 100644 index 000000000000..d126b830db96 --- /dev/null +++ b/lib/ReaderWriter/ELF/OrderPass.h @@ -0,0 +1,30 @@ +//===- lib/ReaderWriter/ELF/OrderPass.h -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_ORDER_PASS_H +#define LLD_READER_WRITER_ELF_ORDER_PASS_H + +#include "lld/Core/Parallel.h" +#include <limits> + +namespace lld { +namespace elf { + +/// \brief This pass sorts atoms by file and atom ordinals. +class OrderPass : public Pass { +public: + void perform(std::unique_ptr<MutableFile> &file) override { + parallel_sort(file->definedAtoms().begin(), file->definedAtoms().end(), + DefinedAtom::compareByPosition); + } +}; +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/OutputELFWriter.h b/lib/ReaderWriter/ELF/OutputELFWriter.h new file mode 100644 index 000000000000..c137905b936b --- /dev/null +++ b/lib/ReaderWriter/ELF/OutputELFWriter.h @@ -0,0 +1,615 @@ +//===- lib/ReaderWriter/ELF/OutputELFWriter.h ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef LLD_READER_WRITER_ELF_OUTPUT_WRITER_H +#define LLD_READER_WRITER_ELF_OUTPUT_WRITER_H + +#include "DefaultLayout.h" +#include "ELFFile.h" +#include "TargetLayout.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/Path.h" + +namespace lld { +namespace elf { +using namespace llvm; +using namespace llvm::object; + +template <class ELFT> class OutputELFWriter; +template <class ELFT> class TargetLayout; + +namespace { + +template<class ELFT> +class SymbolFile : public RuntimeFile<ELFT> { +public: + SymbolFile(ELFLinkingContext &context) + : RuntimeFile<ELFT>(context, "Dynamic absolute symbols"), + _atomsAdded(false) {} + + Atom *addAbsoluteAtom(StringRef symbolName) override { + auto *a = RuntimeFile<ELFT>::addAbsoluteAtom(symbolName); + if (a) _atomsAdded = true; + return a; + } + + Atom *addUndefinedAtom(StringRef) override { + llvm_unreachable("Cannot add undefined atoms to resolve undefined symbols"); + } + + bool hasAtoms() const { return _atomsAdded; } + +private: + bool _atomsAdded; +}; + +template<class ELFT> +class DynamicSymbolFile : public SimpleArchiveLibraryFile { + typedef std::function<void(StringRef, RuntimeFile<ELFT> &)> Resolver; +public: + DynamicSymbolFile(ELFLinkingContext &context, Resolver resolver) + : SimpleArchiveLibraryFile("Dynamically added runtime symbols"), + _context(context), _resolver(resolver) {} + + File *find(StringRef sym, bool dataSymbolOnly) override { + if (!_file) + _file.reset(new (_alloc) SymbolFile<ELFT>(_context)); + + assert(!_file->hasAtoms() && "The file shouldn't have atoms yet"); + _resolver(sym, *_file); + // If atoms were added - release the file to the caller. + return _file->hasAtoms() ? _file.release() : nullptr; + } + +private: + ELFLinkingContext &_context; + Resolver _resolver; + + // The allocator should go before bump pointers because of + // reversed destruction order. + llvm::BumpPtrAllocator _alloc; + unique_bump_ptr<SymbolFile<ELFT>> _file; +}; + +} // end anon namespace + +//===----------------------------------------------------------------------===// +// OutputELFWriter Class +//===----------------------------------------------------------------------===// +/// \brief This acts as the base class for all the ELF writers that are output +/// for emitting an ELF output file. This class also acts as a common class for +/// creating static and dynamic executables. All the function in this class +/// can be overridden and an appropriate writer be created +template<class ELFT> +class OutputELFWriter : public ELFWriter { +public: + typedef Elf_Shdr_Impl<ELFT> Elf_Shdr; + typedef Elf_Sym_Impl<ELFT> Elf_Sym; + typedef Elf_Dyn_Impl<ELFT> Elf_Dyn; + + OutputELFWriter(ELFLinkingContext &context, TargetLayout<ELFT> &layout); + +protected: + // build the sections that need to be created + virtual void createDefaultSections(); + + // Build all the output sections + void buildChunks(const File &file) override; + + // Build the output file + virtual std::error_code buildOutput(const File &file); + + // Setup the ELF header. + virtual std::error_code setELFHeader(); + + // Write the file to the path specified + std::error_code writeFile(const File &File, StringRef path) override; + + // Write to the output file. + virtual std::error_code writeOutput(const File &file, StringRef path); + + // Get the size of the output file that the linker would emit. + virtual uint64_t outputFileSize() const; + + // Build the atom to address map, this has to be called + // before applying relocations + virtual void buildAtomToAddressMap(const File &file); + + // Build the symbol table for static linking + virtual void buildStaticSymbolTable(const File &file); + + // Build the dynamic symbol table for dynamic linking + virtual void buildDynamicSymbolTable(const File &file); + + // Build the section header table + virtual void buildSectionHeaderTable(); + + // Assign sections that have no segments such as the symbol table, + // section header table, string table etc + virtual void assignSectionsWithNoSegments(); + + // Add default atoms that need to be present in the output file + virtual void addDefaultAtoms(); + + // Add any runtime files and their atoms to the output + bool createImplicitFiles(std::vector<std::unique_ptr<File>> &) override; + + // Finalize the default atom values + virtual void finalizeDefaultAtomValues(); + + // This is called by the write section to apply relocations + uint64_t addressOfAtom(const Atom *atom) override { + auto addr = _atomToAddressMap.find(atom); + return addr == _atomToAddressMap.end() ? 0 : addr->second; + } + + // This is a hook for creating default dynamic entries + virtual void createDefaultDynamicEntries() {} + + /// \brief Create symbol table. + virtual unique_bump_ptr<SymbolTable<ELFT>> createSymbolTable(); + + /// \brief create dynamic table. + virtual unique_bump_ptr<DynamicTable<ELFT>> createDynamicTable(); + + /// \brief create dynamic symbol table. + virtual unique_bump_ptr<DynamicSymbolTable<ELFT>> + createDynamicSymbolTable(); + + /// \brief Create entry in the dynamic symbols table for this atom. + virtual bool isDynSymEntryRequired(const SharedLibraryAtom *sla) const { + return _layout.isReferencedByDefinedAtom(sla); + } + + /// \brief Create DT_NEEDED dynamic tage for the shared library. + virtual bool isNeededTagRequired(const SharedLibraryAtom *sla) const { + return false; + } + + /// \brief Process undefined symbols that left after resolution step. + virtual void processUndefinedSymbol(StringRef symName, + RuntimeFile<ELFT> &file) const {} + + llvm::BumpPtrAllocator _alloc; + + ELFLinkingContext &_context; + TargetHandler<ELFT> &_targetHandler; + + typedef llvm::DenseMap<const Atom *, uint64_t> AtomToAddress; + AtomToAddress _atomToAddressMap; + TargetLayout<ELFT> &_layout; + unique_bump_ptr<ELFHeader<ELFT>> _elfHeader; + unique_bump_ptr<ProgramHeader<ELFT>> _programHeader; + unique_bump_ptr<SymbolTable<ELFT>> _symtab; + unique_bump_ptr<StringTable<ELFT>> _strtab; + unique_bump_ptr<StringTable<ELFT>> _shstrtab; + unique_bump_ptr<SectionHeader<ELFT>> _shdrtab; + unique_bump_ptr<EHFrameHeader<ELFT>> _ehFrameHeader; + /// \name Dynamic sections. + /// @{ + unique_bump_ptr<DynamicTable<ELFT>> _dynamicTable; + unique_bump_ptr<DynamicSymbolTable<ELFT>> _dynamicSymbolTable; + unique_bump_ptr<StringTable<ELFT>> _dynamicStringTable; + unique_bump_ptr<HashSection<ELFT>> _hashTable; + llvm::StringSet<> _soNeeded; + /// @} + std::unique_ptr<RuntimeFile<ELFT>> _scriptFile; + +private: + static StringRef maybeGetSOName(Node *node); +}; + +//===----------------------------------------------------------------------===// +// OutputELFWriter +//===----------------------------------------------------------------------===// +template <class ELFT> +OutputELFWriter<ELFT>::OutputELFWriter(ELFLinkingContext &context, + TargetLayout<ELFT> &layout) + : _context(context), _targetHandler(context.getTargetHandler<ELFT>()), + _layout(layout), + _scriptFile(new RuntimeFile<ELFT>(context, "Linker script runtime")) {} + +template <class ELFT> +void OutputELFWriter<ELFT>::buildChunks(const File &file) { + ScopedTask task(getDefaultDomain(), "buildChunks"); + for (const DefinedAtom *definedAtom : file.defined()) { + DefinedAtom::ContentType contentType = definedAtom->contentType(); + // Dont add COMDAT group atoms and GNU linkonce atoms, as they are used for + // symbol resolution. + // TODO: handle partial linking. + if (contentType == DefinedAtom::typeGroupComdat || + contentType == DefinedAtom::typeGnuLinkOnce) + continue; + _layout.addAtom(definedAtom); + } + for (const AbsoluteAtom *absoluteAtom : file.absolute()) + _layout.addAtom(absoluteAtom); +} + +template <class ELFT> +void OutputELFWriter<ELFT>::buildStaticSymbolTable(const File &file) { + ScopedTask task(getDefaultDomain(), "buildStaticSymbolTable"); + for (auto sec : _layout.sections()) + if (auto section = dyn_cast<AtomSection<ELFT>>(sec)) + for (const auto &atom : section->atoms()) + _symtab->addSymbol(atom->_atom, section->ordinal(), atom->_virtualAddr); + for (auto &atom : _layout.absoluteAtoms()) + _symtab->addSymbol(atom->_atom, ELF::SHN_ABS, atom->_virtualAddr); + for (const UndefinedAtom *a : file.undefined()) + _symtab->addSymbol(a, ELF::SHN_UNDEF); +} + +// Returns the DSO name for a given input file if it's a shared library +// file and not marked as --as-needed. +template <class ELFT> +StringRef OutputELFWriter<ELFT>::maybeGetSOName(Node *node) { + if (auto *fnode = dyn_cast<FileNode>(node)) + if (!fnode->asNeeded()) + if (auto *file = dyn_cast<SharedLibraryFile>(fnode->getFile())) + return file->getDSOName(); + return ""; +} + +template <class ELFT> +void OutputELFWriter<ELFT>::buildDynamicSymbolTable(const File &file) { + ScopedTask task(getDefaultDomain(), "buildDynamicSymbolTable"); + for (const auto &sla : file.sharedLibrary()) { + if (isDynSymEntryRequired(sla)) { + _dynamicSymbolTable->addSymbol(sla, ELF::SHN_UNDEF); + _soNeeded.insert(sla->loadName()); + continue; + } + if (isNeededTagRequired(sla)) + _soNeeded.insert(sla->loadName()); + } + for (const std::unique_ptr<Node> &node : _context.getNodes()) { + StringRef soname = maybeGetSOName(node.get()); + if (!soname.empty()) + _soNeeded.insert(soname); + } + // Never mark the dynamic linker as DT_NEEDED + _soNeeded.erase(sys::path::filename(_context.getInterpreter())); + for (const auto &loadName : _soNeeded) { + Elf_Dyn dyn; + dyn.d_tag = DT_NEEDED; + dyn.d_un.d_val = _dynamicStringTable->addString(loadName.getKey()); + _dynamicTable->addEntry(dyn); + } + const auto &rpathList = _context.getRpathList(); + if (!rpathList.empty()) { + auto rpath = new (_alloc) std::string(join(rpathList.begin(), + rpathList.end(), ":")); + Elf_Dyn dyn; + dyn.d_tag = DT_RPATH; + dyn.d_un.d_val = _dynamicStringTable->addString(*rpath); + _dynamicTable->addEntry(dyn); + } + StringRef soname = _context.sharedObjectName(); + if (!soname.empty() && _context.getOutputELFType() == llvm::ELF::ET_DYN) { + Elf_Dyn dyn; + dyn.d_tag = DT_SONAME; + dyn.d_un.d_val = _dynamicStringTable->addString(soname); + _dynamicTable->addEntry(dyn); + } + // The dynamic symbol table need to be sorted earlier because the hash + // table needs to be built using the dynamic symbol table. It would be + // late to sort the symbols due to that in finalize. In the dynamic symbol + // table finalize, we call the symbol table finalize and we don't want to + // sort again + _dynamicSymbolTable->sortSymbols(); + + // Add the dynamic symbols into the hash table + _dynamicSymbolTable->addSymbolsToHashTable(); +} + +template <class ELFT> +void OutputELFWriter<ELFT>::buildAtomToAddressMap(const File &file) { + ScopedTask task(getDefaultDomain(), "buildAtomToAddressMap"); + int64_t totalAbsAtoms = _layout.absoluteAtoms().size(); + int64_t totalUndefinedAtoms = file.undefined().size(); + int64_t totalDefinedAtoms = 0; + for (auto sec : _layout.sections()) + if (auto section = dyn_cast<AtomSection<ELFT> >(sec)) { + totalDefinedAtoms += section->atoms().size(); + for (const auto &atom : section->atoms()) + _atomToAddressMap[atom->_atom] = atom->_virtualAddr; + } + // build the atomToAddressMap that contains absolute symbols too + for (auto &atom : _layout.absoluteAtoms()) + _atomToAddressMap[atom->_atom] = atom->_virtualAddr; + + // Set the total number of atoms in the symbol table, so that appropriate + // resizing of the string table can be done + _symtab->setNumEntries(totalDefinedAtoms + totalAbsAtoms + + totalUndefinedAtoms); +} + +template<class ELFT> +void OutputELFWriter<ELFT>::buildSectionHeaderTable() { + ScopedTask task(getDefaultDomain(), "buildSectionHeaderTable"); + for (auto outputSection : _layout.outputSections()) { + if (outputSection->kind() != Chunk<ELFT>::Kind::ELFSection && + outputSection->kind() != Chunk<ELFT>::Kind::AtomSection) + continue; + if (outputSection->hasSegment()) + _shdrtab->appendSection(outputSection); + } +} + +template<class ELFT> +void OutputELFWriter<ELFT>::assignSectionsWithNoSegments() { + ScopedTask task(getDefaultDomain(), "assignSectionsWithNoSegments"); + for (auto outputSection : _layout.outputSections()) { + if (outputSection->kind() != Chunk<ELFT>::Kind::ELFSection && + outputSection->kind() != Chunk<ELFT>::Kind::AtomSection) + continue; + if (!outputSection->hasSegment()) + _shdrtab->appendSection(outputSection); + } + _layout.assignFileOffsetsForMiscSections(); + for (auto sec : _layout.sections()) + if (auto section = dyn_cast<Section<ELFT>>(sec)) + if (!DefaultLayout<ELFT>::hasOutputSegment(section)) + _shdrtab->updateSection(section); +} + +template <class ELFT> void OutputELFWriter<ELFT>::addDefaultAtoms() { + const llvm::StringSet<> &symbols = + _context.linkerScriptSema().getScriptDefinedSymbols(); + for (auto &sym : symbols) + _scriptFile->addAbsoluteAtom(sym.getKey()); +} + +template <class ELFT> +bool OutputELFWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + // Add the virtual archive to resolve undefined symbols. + // The file will be added later in the linking context. + auto callback = [this](StringRef sym, RuntimeFile<ELFT> &file) { + processUndefinedSymbol(sym, file); + }; + auto &ctx = const_cast<ELFLinkingContext &>(_context); + ctx.setUndefinesResolver( + llvm::make_unique<DynamicSymbolFile<ELFT>>(ctx, std::move(callback))); + // Add script defined symbols + result.push_back(std::move(_scriptFile)); + return true; +} + +template <class ELFT> +void OutputELFWriter<ELFT>::finalizeDefaultAtomValues() { + const llvm::StringSet<> &symbols = + _context.linkerScriptSema().getScriptDefinedSymbols(); + for (auto &sym : symbols) { + uint64_t res = + _context.linkerScriptSema().getLinkerScriptExprValue(sym.getKey()); + auto a = _layout.findAbsoluteAtom(sym.getKey()); + (*a)->_virtualAddr = res; + } +} + +template <class ELFT> void OutputELFWriter<ELFT>::createDefaultSections() { + _elfHeader.reset(new (_alloc) ELFHeader<ELFT>(_context)); + _programHeader.reset(new (_alloc) ProgramHeader<ELFT>(_context)); + _layout.setHeader(_elfHeader.get()); + _layout.setProgramHeader(_programHeader.get()); + + _symtab = std::move(this->createSymbolTable()); + _strtab.reset(new (_alloc) StringTable<ELFT>( + _context, ".strtab", DefaultLayout<ELFT>::ORDER_STRING_TABLE)); + _shstrtab.reset(new (_alloc) StringTable<ELFT>( + _context, ".shstrtab", DefaultLayout<ELFT>::ORDER_SECTION_STRINGS)); + _shdrtab.reset(new (_alloc) SectionHeader<ELFT>( + _context, DefaultLayout<ELFT>::ORDER_SECTION_HEADERS)); + _layout.addSection(_symtab.get()); + _layout.addSection(_strtab.get()); + _layout.addSection(_shstrtab.get()); + _shdrtab->setStringSection(_shstrtab.get()); + _symtab->setStringSection(_strtab.get()); + _layout.addSection(_shdrtab.get()); + + for (auto sec : _layout.sections()) { + // TODO: use findOutputSection + auto section = dyn_cast<Section<ELFT>>(sec); + if (!section || section->outputSectionName() != ".eh_frame") + continue; + _ehFrameHeader.reset(new (_alloc) EHFrameHeader<ELFT>( + _context, ".eh_frame_hdr", _layout, + DefaultLayout<ELFT>::ORDER_EH_FRAMEHDR)); + _layout.addSection(_ehFrameHeader.get()); + break; + } + + if (_context.isDynamic()) { + _dynamicTable = std::move(createDynamicTable()); + _dynamicStringTable.reset(new (_alloc) StringTable<ELFT>( + _context, ".dynstr", DefaultLayout<ELFT>::ORDER_DYNAMIC_STRINGS, true)); + _dynamicSymbolTable = std::move(createDynamicSymbolTable()); + _hashTable.reset(new (_alloc) HashSection<ELFT>( + _context, ".hash", DefaultLayout<ELFT>::ORDER_HASH)); + // Set the hash table in the dynamic symbol table so that the entries in the + // hash table can be created + _dynamicSymbolTable->setHashTable(_hashTable.get()); + _hashTable->setSymbolTable(_dynamicSymbolTable.get()); + _layout.addSection(_dynamicTable.get()); + _layout.addSection(_dynamicStringTable.get()); + _layout.addSection(_dynamicSymbolTable.get()); + _layout.addSection(_hashTable.get()); + _dynamicSymbolTable->setStringSection(_dynamicStringTable.get()); + _dynamicTable->setSymbolTable(_dynamicSymbolTable.get()); + _dynamicTable->setHashTable(_hashTable.get()); + if (_layout.hasDynamicRelocationTable()) + _layout.getDynamicRelocationTable()->setSymbolTable( + _dynamicSymbolTable.get()); + if (_layout.hasPLTRelocationTable()) + _layout.getPLTRelocationTable()->setSymbolTable( + _dynamicSymbolTable.get()); + } +} + +template <class ELFT> +unique_bump_ptr<SymbolTable<ELFT>> + OutputELFWriter<ELFT>::createSymbolTable() { + return unique_bump_ptr<SymbolTable<ELFT>>(new (_alloc) SymbolTable<ELFT>( + this->_context, ".symtab", DefaultLayout<ELFT>::ORDER_SYMBOL_TABLE)); +} + +/// \brief create dynamic table +template <class ELFT> +unique_bump_ptr<DynamicTable<ELFT>> + OutputELFWriter<ELFT>::createDynamicTable() { + return unique_bump_ptr<DynamicTable<ELFT>>( + new (_alloc) DynamicTable<ELFT>( + this->_context, _layout, ".dynamic", DefaultLayout<ELFT>::ORDER_DYNAMIC)); +} + +/// \brief create dynamic symbol table +template <class ELFT> +unique_bump_ptr<DynamicSymbolTable<ELFT>> + OutputELFWriter<ELFT>::createDynamicSymbolTable() { + return unique_bump_ptr<DynamicSymbolTable<ELFT>>( + new (_alloc) DynamicSymbolTable<ELFT>( + this->_context, _layout, ".dynsym", + DefaultLayout<ELFT>::ORDER_DYNAMIC_SYMBOLS)); +} + +template <class ELFT> +std::error_code OutputELFWriter<ELFT>::buildOutput(const File &file) { + ScopedTask buildTask(getDefaultDomain(), "ELF Writer buildOutput"); + buildChunks(file); + + // Create the default sections like the symbol table, string table, and the + // section string table + createDefaultSections(); + + // Set the Layout + _layout.assignSectionsToSegments(); + + // Create the dynamic table entries + if (_context.isDynamic()) { + _dynamicTable->createDefaultEntries(); + buildDynamicSymbolTable(file); + } + + // Call the preFlight callbacks to modify the sections and the atoms + // contained in them, in anyway the targets may want + _layout.doPreFlight(); + + _layout.assignVirtualAddress(); + + // Finalize the default value of symbols that the linker adds + finalizeDefaultAtomValues(); + + // Build the Atom To Address map for applying relocations + buildAtomToAddressMap(file); + + // Create symbol table and section string table + // Do it only if -s is not specified. + if (!_context.stripSymbols()) + buildStaticSymbolTable(file); + + // Finalize the layout by calling the finalize() functions + _layout.finalize(); + + // build Section Header table + buildSectionHeaderTable(); + + // assign Offsets and virtual addresses + // for sections with no segments + assignSectionsWithNoSegments(); + + if (_context.isDynamic()) + _dynamicTable->updateDynamicTable(); + + return std::error_code(); +} + +template <class ELFT> std::error_code OutputELFWriter<ELFT>::setELFHeader() { + _elfHeader->e_type(_context.getOutputELFType()); + _elfHeader->e_machine(_context.getOutputMachine()); + _elfHeader->e_ident(ELF::EI_VERSION, 1); + _elfHeader->e_ident(ELF::EI_OSABI, 0); + _elfHeader->e_version(1); + _elfHeader->e_phoff(_programHeader->fileOffset()); + _elfHeader->e_shoff(_shdrtab->fileOffset()); + _elfHeader->e_phentsize(_programHeader->entsize()); + _elfHeader->e_phnum(_programHeader->numHeaders()); + _elfHeader->e_shentsize(_shdrtab->entsize()); + _elfHeader->e_shnum(_shdrtab->numHeaders()); + _elfHeader->e_shstrndx(_shstrtab->ordinal()); + if (const auto *al = _layout.findAtomLayoutByName(_context.entrySymbolName())) + _elfHeader->e_entry(al->_virtualAddr); + else + _elfHeader->e_entry(0); + + return std::error_code(); +} + +template <class ELFT> uint64_t OutputELFWriter<ELFT>::outputFileSize() const { + return _shdrtab->fileOffset() + _shdrtab->fileSize(); +} + +template <class ELFT> +std::error_code OutputELFWriter<ELFT>::writeOutput(const File &file, + StringRef path) { + std::unique_ptr<FileOutputBuffer> buffer; + ScopedTask createOutputTask(getDefaultDomain(), "ELF Writer Create Output"); + std::error_code ec = FileOutputBuffer::create(path, outputFileSize(), buffer, + FileOutputBuffer::F_executable); + createOutputTask.end(); + + if (ec) + return ec; + + ScopedTask writeTask(getDefaultDomain(), "ELF Writer write to memory"); + + // HACK: We have to write out the header and program header here even though + // they are a member of a segment because only sections are written in the + // following loop. + + // Finalize ELF Header / Program Headers. + _elfHeader->finalize(); + _programHeader->finalize(); + + _elfHeader->write(this, _layout, *buffer); + _programHeader->write(this, _layout, *buffer); + + auto sections = _layout.sections(); + parallel_for_each( + sections.begin(), sections.end(), + [&](Chunk<ELFT> *section) { section->write(this, _layout, *buffer); }); + writeTask.end(); + + ScopedTask commitTask(getDefaultDomain(), "ELF Writer commit to disk"); + return buffer->commit(); +} + +template <class ELFT> +std::error_code OutputELFWriter<ELFT>::writeFile(const File &file, + StringRef path) { + std::error_code ec = buildOutput(file); + if (ec) + return ec; + + ec = setELFHeader(); + if (ec) + return ec; + + return writeOutput(file, path); +} +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_OUTPUT_WRITER_H diff --git a/lib/ReaderWriter/ELF/Reader.cpp b/lib/ReaderWriter/ELF/Reader.cpp new file mode 100644 index 000000000000..fc113d478913 --- /dev/null +++ b/lib/ReaderWriter/ELF/Reader.cpp @@ -0,0 +1,43 @@ +//===- lib/ReaderWriter/ELF/Reader.cpp ------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the ELF Reader and all helper sub classes to consume an ELF +/// file and produces atoms out of it. +/// +//===----------------------------------------------------------------------===// + +#include "ELFReader.h" +#include <map> +#include <vector> + +using llvm::support::endianness; +using namespace llvm::object; + +namespace lld { + +// This dynamic registration of a handler causes support for all ELF +// architectures to be pulled into the linker. If we want to support making a +// linker that only supports one ELF architecture, we'd need to change this +// to have a different registration method for each architecture. +void Registry::addSupportELFObjects(ELFLinkingContext &ctx) { + + // Tell registry about the ELF object file parser. + add(std::move(ctx.targetHandler()->getObjReader())); + + // Tell registry about the relocation name to number mapping for this arch. + ctx.targetHandler()->registerRelocationNames(*this); +} + +void Registry::addSupportELFDynamicSharedObjects(ELFLinkingContext &ctx) { + // Tell registry about the ELF dynamic shared library file parser. + add(ctx.targetHandler()->getDSOReader()); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/ELF/SectionChunks.h b/lib/ReaderWriter/ELF/SectionChunks.h new file mode 100644 index 000000000000..03bdb59e6568 --- /dev/null +++ b/lib/ReaderWriter/ELF/SectionChunks.h @@ -0,0 +1,1498 @@ +//===- lib/ReaderWriter/ELF/SectionChunks.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_SECTION_CHUNKS_H +#define LLD_READER_WRITER_ELF_SECTION_CHUNKS_H + +#include "Chunk.h" +#include "Layout.h" +#include "TargetHandler.h" +#include "Writer.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/range.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <memory> +#include <mutex> + +namespace lld { +namespace elf { +template <class> class OutputSection; +using namespace llvm::ELF; +template <class ELFT> class Segment; + +/// \brief An ELF section. +template <class ELFT> class Section : public Chunk<ELFT> { +public: + Section(const ELFLinkingContext &context, StringRef sectionName, + StringRef chunkName, + typename Chunk<ELFT>::Kind k = Chunk<ELFT>::Kind::ELFSection) + : Chunk<ELFT>(chunkName, k, context), _outputSection(nullptr), _flags(0), + _entSize(0), _type(0), _link(0), _info(0), + _isFirstSectionInOutputSection(false), _segmentType(SHT_NULL), + _inputSectionName(sectionName), _outputSectionName(sectionName) {} + + /// \brief Modify the section contents before assigning virtual addresses + // or assigning file offsets + void doPreFlight() override {} + + /// \brief Finalize the section contents before writing + void finalize() override {} + + /// \brief Does this section have an output segment. + virtual bool hasOutputSegment() { + return false; + } + + /// Return if the section is a loadable section that occupies memory + virtual bool isLoadableSection() const { return false; } + + /// \brief Assign file offsets starting at offset. + virtual void assignFileOffsets(uint64_t offset) {} + + /// \brief Assign virtual addresses starting at addr. + virtual void assignVirtualAddress(uint64_t addr) {} + + uint64_t getFlags() const { return _flags; } + uint64_t getEntSize() const { return _entSize; } + uint32_t getType() const { return _type; } + uint32_t getLink() const { return _link; } + uint32_t getInfo() const { return _info; } + Layout::SegmentType getSegmentType() const { return _segmentType; } + + /// \brief Return the type of content that the section contains + virtual int getContentType() const override { + if (_flags & llvm::ELF::SHF_EXECINSTR) + return Chunk<ELFT>::ContentType::Code; + else if (_flags & llvm::ELF::SHF_WRITE) + return Chunk<ELFT>::ContentType::Data; + else if (_flags & llvm::ELF::SHF_ALLOC) + return Chunk<ELFT>::ContentType::Code; + else + return Chunk<ELFT>::ContentType::Unknown; + } + + /// \brief convert the segment type to a String for diagnostics and printing + /// purposes + StringRef segmentKindToStr() const; + + /// \brief Records the segmentType, that this section belongs to + void setSegmentType(const Layout::SegmentType segmentType) { + this->_segmentType = segmentType; + } + + virtual const AtomLayout *findAtomLayoutByName(StringRef) const { + return nullptr; + } + + void setOutputSection(OutputSection<ELFT> *os, bool isFirst = false) { + _outputSection = os; + _isFirstSectionInOutputSection = isFirst; + } + + static bool classof(const Chunk<ELFT> *c) { + return c->kind() == Chunk<ELFT>::Kind::ELFSection || + c->kind() == Chunk<ELFT>::Kind::AtomSection; + } + + uint64_t alignment() const override { + return _isFirstSectionInOutputSection ? _outputSection->alignment() + : this->_alignment; + } + + virtual StringRef inputSectionName() const { return _inputSectionName; } + + virtual StringRef outputSectionName() const { return _outputSectionName; } + + virtual void setOutputSectionName(StringRef outputSectionName) { + _outputSectionName = outputSectionName; + } + + void setArchiveNameOrPath(StringRef name) { _archivePath = name; } + + void setMemberNameOrPath(StringRef name) { _memberPath = name; } + + StringRef archivePath() { return _archivePath; } + + StringRef memberPath() { return _memberPath; } + +protected: + /// \brief OutputSection this Section is a member of, or nullptr. + OutputSection<ELFT> *_outputSection; + /// \brief ELF SHF_* flags. + uint64_t _flags; + /// \brief The size of each entity. + uint64_t _entSize; + /// \brief ELF SHT_* type. + uint32_t _type; + /// \brief sh_link field. + uint32_t _link; + /// \brief the sh_info field. + uint32_t _info; + /// \brief Is this the first section in the output section. + bool _isFirstSectionInOutputSection; + /// \brief the output ELF segment type of this section. + Layout::SegmentType _segmentType; + /// \brief Input section name. + StringRef _inputSectionName; + /// \brief Output section name. + StringRef _outputSectionName; + StringRef _archivePath; + StringRef _memberPath; +}; + +/// \brief A section containing atoms. +template <class ELFT> class AtomSection : public Section<ELFT> { +public: + AtomSection(const ELFLinkingContext &context, StringRef sectionName, + int32_t contentType, int32_t permissions, int32_t order) + : Section<ELFT>(context, sectionName, "AtomSection", + Chunk<ELFT>::Kind::AtomSection), + _contentType(contentType), _contentPermissions(permissions), + _isLoadedInMemory(true) { + this->setOrder(order); + + switch (contentType) { + case DefinedAtom::typeCode: + case DefinedAtom::typeDataFast: + case DefinedAtom::typeData: + case DefinedAtom::typeConstant: + case DefinedAtom::typeGOT: + case DefinedAtom::typeStub: + case DefinedAtom::typeResolver: + case DefinedAtom::typeThreadData: + this->_type = SHT_PROGBITS; + break; + + case DefinedAtom::typeThreadZeroFill: + case DefinedAtom::typeZeroFillFast: + case DefinedAtom::typeZeroFill: + this->_type = SHT_NOBITS; + break; + + case DefinedAtom::typeRONote: + case DefinedAtom::typeRWNote: + this->_type = SHT_NOTE; + break; + + case DefinedAtom::typeNoAlloc: + this->_type = SHT_PROGBITS; + this->_isLoadedInMemory = false; + break; + } + + switch (permissions) { + case DefinedAtom::permR__: + this->_flags = SHF_ALLOC; + break; + case DefinedAtom::permR_X: + this->_flags = SHF_ALLOC | SHF_EXECINSTR; + break; + case DefinedAtom::permRW_: + case DefinedAtom::permRW_L: + this->_flags = SHF_ALLOC | SHF_WRITE; + if (_contentType == DefinedAtom::typeThreadData || + _contentType == DefinedAtom::typeThreadZeroFill) + this->_flags |= SHF_TLS; + break; + case DefinedAtom::permRWX: + this->_flags = SHF_ALLOC | SHF_WRITE | SHF_EXECINSTR; + break; + case DefinedAtom::perm___: + this->_flags = 0; + break; + } + } + + /// Align the offset to the required modulus defined by the atom alignment + uint64_t alignOffset(uint64_t offset, DefinedAtom::Alignment &atomAlign); + + /// Return if the section is a loadable section that occupies memory + bool isLoadableSection() const override { return _isLoadedInMemory; } + + // \brief Append an atom to a Section. The atom gets pushed into a vector + // contains the atom, the atom file offset, the atom virtual address + // the atom file offset is aligned appropriately as set by the Reader + virtual const lld::AtomLayout *appendAtom(const Atom *atom); + + /// \brief Set the virtual address of each Atom in the Section. This + /// routine gets called after the linker fixes up the virtual address + /// of the section + virtual void assignVirtualAddress(uint64_t addr) override { + parallel_for_each(_atoms.begin(), _atoms.end(), [&](AtomLayout *ai) { + ai->_virtualAddr = addr + ai->_fileOffset; + }); + } + + /// \brief Set the file offset of each Atom in the section. This routine + /// gets called after the linker fixes up the section offset + void assignFileOffsets(uint64_t offset) override { + parallel_for_each(_atoms.begin(), _atoms.end(), [&](AtomLayout *ai) { + ai->_fileOffset = offset + ai->_fileOffset; + }); + } + + /// \brief Find the Atom address given a name, this is needed to properly + /// apply relocation. The section class calls this to find the atom address + /// to fix the relocation + const AtomLayout *findAtomLayoutByName(StringRef name) const override { + for (auto ai : _atoms) + if (ai->_atom->name() == name) + return ai; + return nullptr; + } + + /// \brief Return the raw flags, we need this to sort segments + int64_t atomflags() const { return _contentPermissions; } + + /// Atom Iterators + typedef typename std::vector<lld::AtomLayout *>::iterator atom_iter; + + range<atom_iter> atoms() { return _atoms; } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override; + + static bool classof(const Chunk<ELFT> *c) { + return c->kind() == Chunk<ELFT>::Kind::AtomSection; + } + +protected: + llvm::BumpPtrAllocator _alloc; + int32_t _contentType; + int32_t _contentPermissions; + bool _isLoadedInMemory; + std::vector<lld::AtomLayout *> _atoms; + mutable std::mutex _outputMutex; + + void printError(const std::string &errorStr, const AtomLayout &atom, + const Reference &ref) const { + StringRef kindValStr; + if (!this->_context.registry().referenceKindToString(ref.kindNamespace(), + ref.kindArch(), + ref.kindValue(), + kindValStr)) { + kindValStr = "unknown"; + } + + std::string errStr = (Twine(errorStr) + " in file " + + atom._atom->file().path() + + ": reference from " + atom._atom->name() + + "+" + Twine(ref.offsetInAtom()) + + " to " + ref.target()->name() + + "+" + Twine(ref.addend()) + + " of type " + Twine(ref.kindValue()) + + " (" + kindValStr + ")\n").str(); + + // Take the lock to prevent output getting interleaved between threads + std::lock_guard<std::mutex> lock(_outputMutex); + llvm::errs() << errStr; + } +}; + +/// Align the offset to the required modulus defined by the atom alignment +template <class ELFT> +uint64_t AtomSection<ELFT>::alignOffset(uint64_t offset, + DefinedAtom::Alignment &atomAlign) { + uint64_t requiredModulus = atomAlign.modulus; + uint64_t alignment = 1u << atomAlign.powerOf2; + uint64_t currentModulus = (offset % alignment); + uint64_t retOffset = offset; + if (currentModulus != requiredModulus) { + if (requiredModulus > currentModulus) + retOffset += requiredModulus - currentModulus; + else + retOffset += alignment + requiredModulus - currentModulus; + } + return retOffset; +} + +// \brief Append an atom to a Section. The atom gets pushed into a vector +// contains the atom, the atom file offset, the atom virtual address +// the atom file offset is aligned appropriately as set by the Reader +template <class ELFT> +const lld::AtomLayout *AtomSection<ELFT>::appendAtom(const Atom *atom) { + const DefinedAtom *definedAtom = cast<DefinedAtom>(atom); + + DefinedAtom::Alignment atomAlign = definedAtom->alignment(); + uint64_t alignment = 1u << atomAlign.powerOf2; + // Align the atom to the required modulus/ align the file offset and the + // memory offset separately this is required so that BSS symbols are handled + // properly as the BSS symbols only occupy memory size and not file size + uint64_t fOffset = alignOffset(this->fileSize(), atomAlign); + uint64_t mOffset = alignOffset(this->memSize(), atomAlign); + switch(definedAtom->contentType()) { + case DefinedAtom::typeCode: + case DefinedAtom::typeConstant: + case DefinedAtom::typeData: + case DefinedAtom::typeDataFast: + case DefinedAtom::typeZeroFillFast: + case DefinedAtom::typeGOT: + case DefinedAtom::typeStub: + case DefinedAtom::typeResolver: + case DefinedAtom::typeThreadData: + case DefinedAtom::typeRONote: + case DefinedAtom::typeRWNote: + _atoms.push_back(new (_alloc) lld::AtomLayout(atom, fOffset, 0)); + this->_fsize = fOffset + definedAtom->size(); + this->_msize = mOffset + definedAtom->size(); + DEBUG_WITH_TYPE("Section", + llvm::dbgs() << "[" << this->name() << " " << this << "] " + << "Adding atom: " << atom->name() << "@" + << fOffset << "\n"); + break; + case DefinedAtom::typeNoAlloc: + _atoms.push_back(new (_alloc) lld::AtomLayout(atom, fOffset, 0)); + this->_fsize = fOffset + definedAtom->size(); + DEBUG_WITH_TYPE("Section", llvm::dbgs() << "[" << this->name() << " " + << this << "] " + << "Adding atom: " << atom->name() + << "@" << fOffset << "\n"); + break; + case DefinedAtom::typeThreadZeroFill: + case DefinedAtom::typeZeroFill: + _atoms.push_back(new (_alloc) lld::AtomLayout(atom, mOffset, 0)); + this->_msize = mOffset + definedAtom->size(); + break; + default: + llvm::dbgs() << definedAtom->contentType() << "\n"; + llvm_unreachable("Uexpected content type."); + } + // Set the section alignment to the largest alignment + // std::max doesn't support uint64_t + if (this->_alignment < alignment) + this->_alignment = alignment; + + if (_atoms.size()) + return _atoms.back(); + return nullptr; +} + +/// \brief convert the segment type to a String for diagnostics +/// and printing purposes +template <class ELFT> StringRef Section<ELFT>::segmentKindToStr() const { + switch(_segmentType) { + case llvm::ELF::PT_DYNAMIC: + return "DYNAMIC"; + case llvm::ELF::PT_INTERP: + return "INTERP"; + case llvm::ELF::PT_LOAD: + return "LOAD"; + case llvm::ELF::PT_GNU_EH_FRAME: + return "EH_FRAME"; + case llvm::ELF::PT_GNU_RELRO: + return "GNU_RELRO"; + case llvm::ELF::PT_NOTE: + return "NOTE"; + case llvm::ELF::PT_NULL: + return "NULL"; + case llvm::ELF::PT_TLS: + return "TLS"; + default: + return "UNKNOWN"; + } +} + +/// \brief Write the section and the atom contents to the buffer +template <class ELFT> +void AtomSection<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + bool success = true; + parallel_for_each(_atoms.begin(), _atoms.end(), [&](lld::AtomLayout * ai) { + DEBUG_WITH_TYPE("Section", + llvm::dbgs() << "Writing atom: " << ai->_atom->name() + << " | " << ai->_fileOffset << "\n"); + const DefinedAtom *definedAtom = cast<DefinedAtom>(ai->_atom); + if (!definedAtom->occupiesDiskSpace()) + return; + // Copy raw content of atom to file buffer. + ArrayRef<uint8_t> content = definedAtom->rawContent(); + uint64_t contentSize = content.size(); + if (contentSize == 0) + return; + uint8_t *atomContent = chunkBuffer + ai->_fileOffset; + std::memcpy(atomContent, content.data(), contentSize); + const TargetRelocationHandler &relHandler = + this->_context.template getTargetHandler<ELFT>().getRelocationHandler(); + for (const auto ref : *definedAtom) { + if (std::error_code ec = relHandler.applyRelocation(*writer, buffer, + *ai, *ref)) { + printError(ec.message(), *ai, *ref); + success = false; + } + } + }); + if (!success) + llvm::report_fatal_error("relocating output"); +} + +/// \brief A OutputSection represents a set of sections grouped by the same +/// name. The output file that gets written by the linker has sections grouped +/// by similar names +template <class ELFT> class OutputSection { +public: + // Iterators + typedef typename std::vector<Chunk<ELFT> *>::iterator ChunkIter; + + OutputSection(StringRef name); + + // Appends a section into the list of sections that are part of this Output + // Section + void appendSection(Chunk<ELFT> *c); + + // Set the OutputSection is associated with a segment + void setHasSegment() { _hasSegment = true; } + + /// Sets the ordinal + void setOrdinal(uint64_t ordinal) { _ordinal = ordinal; } + + /// Sets the Memory size + void setMemSize(uint64_t memsz) { _memSize = memsz; } + + /// Sets the size fo the output Section. + void setSize(uint64_t fsiz) { _size = fsiz; } + + // The offset of the first section contained in the output section is + // contained here. + void setFileOffset(uint64_t foffset) { _fileOffset = foffset; } + + // Sets the starting address of the section + void setAddr(uint64_t addr) { _virtualAddr = addr; } + + // Is the section loadable? + bool isLoadableSection() const { return _isLoadableSection; } + + // Set section Loadable + void setLoadableSection(bool isLoadable) { + _isLoadableSection = isLoadable; + } + + void setLink(uint64_t link) { _link = link; } + + void setInfo(uint64_t info) { _shInfo = info; } + + void setFlag(uint64_t flags) { _flags = flags; } + + void setType(int16_t type) { _type = type; } + + range<ChunkIter> sections() { return _sections; } + + // The below functions returns the properties of the OutputSection. + bool hasSegment() const { return _hasSegment; } + + StringRef name() const { return _name; } + + int64_t shinfo() const { return _shInfo; } + + uint64_t alignment() const { return _alignment; } + + int64_t link() const { return _link; } + + int64_t type() const { return _type; } + + uint64_t virtualAddr() const { return _virtualAddr; } + + int64_t ordinal() const { return _ordinal; } + + int64_t kind() const { return _kind; } + + uint64_t fileSize() const { return _size; } + + int64_t entsize() const { return _entSize; } + + uint64_t fileOffset() const { return _fileOffset; } + + int64_t flags() const { return _flags; } + + uint64_t memSize() { return _memSize; } + +private: + StringRef _name; + bool _hasSegment; + uint64_t _ordinal; + uint64_t _flags; + uint64_t _size; + uint64_t _memSize; + uint64_t _fileOffset; + uint64_t _virtualAddr; + int64_t _shInfo; + int64_t _entSize; + int64_t _link; + uint64_t _alignment; + int64_t _kind; + int64_t _type; + bool _isLoadableSection; + std::vector<Chunk<ELFT> *> _sections; +}; + +/// OutputSection +template <class ELFT> +OutputSection<ELFT>::OutputSection(StringRef name) + : _name(name), _hasSegment(false), _ordinal(0), _flags(0), _size(0), + _memSize(0), _fileOffset(0), _virtualAddr(0), _shInfo(0), _entSize(0), + _link(0), _alignment(0), _kind(0), _type(0), _isLoadableSection(false) {} + +template <class ELFT> void OutputSection<ELFT>::appendSection(Chunk<ELFT> *c) { + if (c->alignment() > _alignment) + _alignment = c->alignment(); + if (const auto section = dyn_cast<Section<ELFT>>(c)) { + assert(!_link && "Section already has a link!"); + _link = section->getLink(); + _shInfo = section->getInfo(); + _entSize = section->getEntSize(); + _type = section->getType(); + if (_flags < section->getFlags()) + _flags = section->getFlags(); + section->setOutputSection(this, (_sections.size() == 0)); + } + _kind = c->kind(); + _sections.push_back(c); +} + +/// \brief The class represents the ELF String Table +template<class ELFT> +class StringTable : public Section<ELFT> { +public: + StringTable(const ELFLinkingContext &, const char *str, int32_t order, + bool dynamic = false); + + uint64_t addString(StringRef symname); + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override; + + void setNumEntries(int64_t numEntries) { _stringMap.resize(numEntries); } + +private: + std::vector<StringRef> _strings; + + struct StringRefMappingInfo { + static StringRef getEmptyKey() { return StringRef(); } + static StringRef getTombstoneKey() { return StringRef(" ", 1); } + static unsigned getHashValue(StringRef const val) { + return llvm::HashString(val); + } + static bool isEqual(StringRef const lhs, StringRef const rhs) { + return lhs.equals(rhs); + } + }; + typedef typename llvm::DenseMap<StringRef, uint64_t, + StringRefMappingInfo> StringMapT; + typedef typename StringMapT::iterator StringMapTIter; + StringMapT _stringMap; +}; + +template <class ELFT> +StringTable<ELFT>::StringTable(const ELFLinkingContext &context, + const char *str, int32_t order, bool dynamic) + : Section<ELFT>(context, str, "StringTable") { + // the string table has a NULL entry for which + // add an empty string + _strings.push_back(""); + this->_fsize = 1; + this->_alignment = 1; + this->setOrder(order); + this->_type = SHT_STRTAB; + if (dynamic) { + this->_flags = SHF_ALLOC; + this->_msize = this->_fsize; + } +} + +template <class ELFT> uint64_t StringTable<ELFT>::addString(StringRef symname) { + if (symname.empty()) + return 0; + StringMapTIter stringIter = _stringMap.find(symname); + if (stringIter == _stringMap.end()) { + _strings.push_back(symname); + uint64_t offset = this->_fsize; + this->_fsize += symname.size() + 1; + if (this->_flags & SHF_ALLOC) + this->_msize = this->_fsize; + _stringMap[symname] = offset; + return offset; + } + return stringIter->second; +} + +template <class ELFT> +void StringTable<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + for (auto si : _strings) { + memcpy(dest, si.data(), si.size()); + dest += si.size(); + memcpy(dest, "", 1); + dest += 1; + } +} + +/// \brief The SymbolTable class represents the symbol table in a ELF file +template<class ELFT> +class SymbolTable : public Section<ELFT> { + typedef typename llvm::object::ELFDataTypeTypedefHelper<ELFT>::Elf_Addr + Elf_Addr; + +public: + typedef llvm::object::Elf_Sym_Impl<ELFT> Elf_Sym; + + SymbolTable(const ELFLinkingContext &context, const char *str, int32_t order); + + /// \brief set the number of entries that would exist in the symbol + /// table for the current link + void setNumEntries(int64_t numEntries) const { + if (_stringSection) + _stringSection->setNumEntries(numEntries); + } + + /// \brief return number of entries + std::size_t size() const { return _symbolTable.size(); } + + void addSymbol(const Atom *atom, int32_t sectionIndex, uint64_t addr = 0, + const lld::AtomLayout *layout = nullptr); + + /// \brief Get the symbol table index for an Atom. If it's not in the symbol + /// table, return STN_UNDEF. + uint32_t getSymbolTableIndex(const Atom *a) const { + for (size_t i = 0, e = _symbolTable.size(); i < e; ++i) + if (_symbolTable[i]._atom == a) + return i; + return STN_UNDEF; + } + + void finalize() override { finalize(true); } + + virtual void sortSymbols() { + std::stable_sort(_symbolTable.begin(), _symbolTable.end(), + [](const SymbolEntry & A, const SymbolEntry & B) { + return A._symbol.getBinding() < B._symbol.getBinding(); + }); + } + + virtual void addAbsoluteAtom(Elf_Sym &sym, const AbsoluteAtom *aa, + int64_t addr); + + virtual void addDefinedAtom(Elf_Sym &sym, const DefinedAtom *da, + int64_t addr); + + virtual void addUndefinedAtom(Elf_Sym &sym, const UndefinedAtom *ua); + + virtual void addSharedLibAtom(Elf_Sym &sym, const SharedLibraryAtom *sla); + + virtual void finalize(bool sort); + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override; + + void setStringSection(StringTable<ELFT> *s) { _stringSection = s; } + + StringTable<ELFT> *getStringTable() const { return _stringSection; } + +protected: + struct SymbolEntry { + SymbolEntry(const Atom *a, const Elf_Sym &sym, + const lld::AtomLayout *layout) + : _atom(a), _atomLayout(layout), _symbol(sym) {} + + const Atom *_atom; + const lld::AtomLayout *_atomLayout; + Elf_Sym _symbol; + }; + + llvm::BumpPtrAllocator _symbolAllocate; + StringTable<ELFT> *_stringSection; + std::vector<SymbolEntry> _symbolTable; +}; + +/// ELF Symbol Table +template <class ELFT> +SymbolTable<ELFT>::SymbolTable(const ELFLinkingContext &context, + const char *str, int32_t order) + : Section<ELFT>(context, str, "SymbolTable") { + this->setOrder(order); + Elf_Sym symbol; + std::memset(&symbol, 0, sizeof(Elf_Sym)); + _symbolTable.push_back(SymbolEntry(nullptr, symbol, nullptr)); + this->_entSize = sizeof(Elf_Sym); + this->_fsize = sizeof(Elf_Sym); + this->_alignment = sizeof(Elf_Addr); + this->_type = SHT_SYMTAB; +} + +template <class ELFT> +void SymbolTable<ELFT>::addDefinedAtom(Elf_Sym &sym, const DefinedAtom *da, + int64_t addr) { + unsigned char binding = 0, type = 0; + sym.st_size = da->size(); + DefinedAtom::ContentType ct; + switch (ct = da->contentType()) { + case DefinedAtom::typeCode: + case DefinedAtom::typeStub: + sym.st_value = addr; + type = llvm::ELF::STT_FUNC; + break; + case DefinedAtom::typeResolver: + sym.st_value = addr; + type = llvm::ELF::STT_GNU_IFUNC; + break; + case DefinedAtom::typeDataFast: + case DefinedAtom::typeData: + case DefinedAtom::typeConstant: + sym.st_value = addr; + type = llvm::ELF::STT_OBJECT; + break; + case DefinedAtom::typeGOT: + sym.st_value = addr; + type = llvm::ELF::STT_NOTYPE; + break; + case DefinedAtom::typeZeroFill: + case DefinedAtom::typeZeroFillFast: + type = llvm::ELF::STT_OBJECT; + sym.st_value = addr; + break; + case DefinedAtom::typeThreadData: + case DefinedAtom::typeThreadZeroFill: + type = llvm::ELF::STT_TLS; + sym.st_value = addr; + break; + default: + type = llvm::ELF::STT_NOTYPE; + } + if (da->customSectionName() == da->name()) + type = llvm::ELF::STT_SECTION; + + if (da->scope() == DefinedAtom::scopeTranslationUnit) + binding = llvm::ELF::STB_LOCAL; + else + binding = llvm::ELF::STB_GLOBAL; + + sym.setBindingAndType(binding, type); +} + +template <class ELFT> +void SymbolTable<ELFT>::addAbsoluteAtom(Elf_Sym &sym, const AbsoluteAtom *aa, + int64_t addr) { + unsigned char binding = 0, type = 0; + type = llvm::ELF::STT_OBJECT; + sym.st_shndx = llvm::ELF::SHN_ABS; + switch (aa->scope()) { + case AbsoluteAtom::scopeLinkageUnit: + sym.setVisibility(llvm::ELF::STV_HIDDEN); + binding = llvm::ELF::STB_LOCAL; + break; + case AbsoluteAtom::scopeTranslationUnit: + binding = llvm::ELF::STB_LOCAL; + break; + case AbsoluteAtom::scopeGlobal: + binding = llvm::ELF::STB_GLOBAL; + break; + } + sym.st_value = addr; + sym.setBindingAndType(binding, type); +} + +template <class ELFT> +void SymbolTable<ELFT>::addSharedLibAtom(Elf_Sym &sym, + const SharedLibraryAtom *aa) { + unsigned char binding = 0, type = 0; + if (aa->type() == SharedLibraryAtom::Type::Data) { + type = llvm::ELF::STT_OBJECT; + sym.st_size = aa->size(); + } else + type = llvm::ELF::STT_FUNC; + sym.st_shndx = llvm::ELF::SHN_UNDEF; + binding = llvm::ELF::STB_GLOBAL; + sym.setBindingAndType(binding, type); +} + +template <class ELFT> +void SymbolTable<ELFT>::addUndefinedAtom(Elf_Sym &sym, + const UndefinedAtom *ua) { + unsigned char binding = 0, type = 0; + sym.st_value = 0; + type = llvm::ELF::STT_NOTYPE; + if (ua->canBeNull()) + binding = llvm::ELF::STB_WEAK; + else + binding = llvm::ELF::STB_GLOBAL; + sym.setBindingAndType(binding, type); +} + +/// Add a symbol to the symbol Table, definedAtoms which get added to the symbol +/// section don't have their virtual addresses set at the time of adding the +/// symbol to the symbol table(Example: dynamic symbols), the addresses needs +/// to be updated in the table before writing the dynamic symbol table +/// information +template <class ELFT> +void SymbolTable<ELFT>::addSymbol(const Atom *atom, int32_t sectionIndex, + uint64_t addr, + const lld::AtomLayout *atomLayout) { + Elf_Sym symbol; + + if (atom->name().empty()) + return; + + symbol.st_name = _stringSection->addString(atom->name()); + symbol.st_size = 0; + symbol.st_shndx = sectionIndex; + symbol.st_value = 0; + symbol.st_other = 0; + symbol.setVisibility(llvm::ELF::STV_DEFAULT); + + // Add all the atoms + if (const DefinedAtom *da = dyn_cast<const DefinedAtom>(atom)) + addDefinedAtom(symbol, da, addr); + else if (const AbsoluteAtom *aa = dyn_cast<const AbsoluteAtom>(atom)) + addAbsoluteAtom(symbol, aa, addr); + else if (isa<const SharedLibraryAtom>(atom)) + addSharedLibAtom(symbol, dyn_cast<SharedLibraryAtom>(atom)); + else + addUndefinedAtom(symbol, dyn_cast<UndefinedAtom>(atom)); + + _symbolTable.push_back(SymbolEntry(atom, symbol, atomLayout)); + this->_fsize += sizeof(Elf_Sym); + if (this->_flags & SHF_ALLOC) + this->_msize = this->_fsize; +} + +template <class ELFT> void SymbolTable<ELFT>::finalize(bool sort) { + // sh_info should be one greater than last symbol with STB_LOCAL binding + // we sort the symbol table to keep all local symbols at the beginning + if (sort) + sortSymbols(); + + uint16_t shInfo = 0; + for (const auto &i : _symbolTable) { + if (i._symbol.getBinding() != llvm::ELF::STB_LOCAL) + break; + shInfo++; + } + this->_info = shInfo; + this->_link = _stringSection->ordinal(); + if (this->_outputSection) { + this->_outputSection->setInfo(this->_info); + this->_outputSection->setLink(this->_link); + } +} + +template <class ELFT> +void SymbolTable<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + for (const auto &sti : _symbolTable) { + memcpy(dest, &sti._symbol, sizeof(Elf_Sym)); + dest += sizeof(Elf_Sym); + } +} + +template <class ELFT> class HashSection; + +template <class ELFT> class DynamicSymbolTable : public SymbolTable<ELFT> { +public: + DynamicSymbolTable(const ELFLinkingContext &context, + TargetLayout<ELFT> &layout, const char *str, int32_t order) + : SymbolTable<ELFT>(context, str, order), _hashTable(nullptr), + _layout(layout) { + this->_type = SHT_DYNSYM; + this->_flags = SHF_ALLOC; + this->_msize = this->_fsize; + } + + // Set the dynamic hash table for symbols to be added into + void setHashTable(HashSection<ELFT> *hashTable) { _hashTable = hashTable; } + + // Add all the dynamic symbos to the hash table + void addSymbolsToHashTable() { + int index = 0; + for (auto &ste : this->_symbolTable) { + if (!ste._atom) + _hashTable->addSymbol("", index); + else + _hashTable->addSymbol(ste._atom->name(), index); + ++index; + } + } + + void finalize() override { + // Defined symbols which have been added into the dynamic symbol table + // don't have their addresses known until addresses have been assigned + // so let's update the symbol values after they have got assigned + for (auto &ste: this->_symbolTable) { + const lld::AtomLayout *atomLayout = ste._atomLayout; + if (!atomLayout) + continue; + ste._symbol.st_value = atomLayout->_virtualAddr; + } + + // Don't sort the symbols + SymbolTable<ELFT>::finalize(false); + } + +protected: + HashSection<ELFT> *_hashTable; + TargetLayout<ELFT> &_layout; +}; + +template <class ELFT> class RelocationTable : public Section<ELFT> { +public: + typedef llvm::object::Elf_Rel_Impl<ELFT, false> Elf_Rel; + typedef llvm::object::Elf_Rel_Impl<ELFT, true> Elf_Rela; + + RelocationTable(const ELFLinkingContext &context, StringRef str, + int32_t order) + : Section<ELFT>(context, str, "RelocationTable"), _symbolTable(nullptr) { + this->setOrder(order); + this->_flags = SHF_ALLOC; + // Set the alignment properly depending on the target architecture + this->_alignment = ELFT::Is64Bits ? 8 : 4; + if (context.isRelaOutputFormat()) { + this->_entSize = sizeof(Elf_Rela); + this->_type = SHT_RELA; + } else { + this->_entSize = sizeof(Elf_Rel); + this->_type = SHT_REL; + } + } + + /// \returns the index of the relocation added. + uint32_t addRelocation(const DefinedAtom &da, const Reference &r) { + _relocs.emplace_back(&da, &r); + this->_fsize = _relocs.size() * this->_entSize; + this->_msize = this->_fsize; + return _relocs.size() - 1; + } + + bool getRelocationIndex(const Reference &r, uint32_t &res) { + auto rel = std::find_if( + _relocs.begin(), _relocs.end(), + [&](const std::pair<const DefinedAtom *, const Reference *> &p) { + if (p.second == &r) + return true; + return false; + }); + if (rel == _relocs.end()) + return false; + res = std::distance(_relocs.begin(), rel); + return true; + } + + void setSymbolTable(const DynamicSymbolTable<ELFT> *symbolTable) { + _symbolTable = symbolTable; + } + + /// \brief Check if any relocation modifies a read-only section. + bool canModifyReadonlySection() const { + for (const auto &rel : _relocs) { + const DefinedAtom *atom = rel.first; + if ((atom->permissions() & DefinedAtom::permRW_) != DefinedAtom::permRW_) + return true; + } + return false; + } + + void finalize() override { + this->_link = _symbolTable ? _symbolTable->ordinal() : 0; + if (this->_outputSection) + this->_outputSection->setLink(this->_link); + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + for (const auto &rel : _relocs) { + if (this->_context.isRelaOutputFormat()) { + auto &r = *reinterpret_cast<Elf_Rela *>(dest); + writeRela(writer, r, *rel.first, *rel.second); + DEBUG_WITH_TYPE("ELFRelocationTable", + llvm::dbgs() + << rel.second->kindValue() << " relocation at " + << rel.first->name() << "@" << r.r_offset << " to " + << rel.second->target()->name() << "@" << r.r_addend + << "\n";); + } else { + auto &r = *reinterpret_cast<Elf_Rel *>(dest); + writeRel(writer, r, *rel.first, *rel.second); + DEBUG_WITH_TYPE("ELFRelocationTable", + llvm::dbgs() << rel.second->kindValue() + << " relocation at " << rel.first->name() + << "@" << r.r_offset << " to " + << rel.second->target()->name() << "\n";); + } + dest += this->_entSize; + } + } + +protected: + const DynamicSymbolTable<ELFT> *_symbolTable; + + virtual void writeRela(ELFWriter *writer, Elf_Rela &r, + const DefinedAtom &atom, const Reference &ref) { + r.setSymbolAndType(getSymbolIndex(ref.target()), ref.kindValue(), false); + r.r_offset = writer->addressOfAtom(&atom) + ref.offsetInAtom(); + // The addend is used only by relative relocations + if (this->_context.isRelativeReloc(ref)) + r.r_addend = writer->addressOfAtom(ref.target()) + ref.addend(); + else + r.r_addend = 0; + } + + virtual void writeRel(ELFWriter *writer, Elf_Rel &r, const DefinedAtom &atom, + const Reference &ref) { + r.setSymbolAndType(getSymbolIndex(ref.target()), ref.kindValue(), false); + r.r_offset = writer->addressOfAtom(&atom) + ref.offsetInAtom(); + } + + uint32_t getSymbolIndex(const Atom *a) { + return _symbolTable ? _symbolTable->getSymbolTableIndex(a) + : (uint32_t)STN_UNDEF; + } + +private: + std::vector<std::pair<const DefinedAtom *, const Reference *> > _relocs; +}; + +template <class ELFT> class HashSection; + +template <class ELFT> class DynamicTable : public Section<ELFT> { +public: + typedef llvm::object::Elf_Dyn_Impl<ELFT> Elf_Dyn; + typedef std::vector<Elf_Dyn> EntriesT; + + DynamicTable(const ELFLinkingContext &context, TargetLayout<ELFT> &layout, + StringRef str, int32_t order) + : Section<ELFT>(context, str, "DynamicSection"), _layout(layout) { + this->setOrder(order); + this->_entSize = sizeof(Elf_Dyn); + this->_alignment = ELFT::Is64Bits ? 8 : 4; + // Reserve space for the DT_NULL entry. + this->_fsize = sizeof(Elf_Dyn); + this->_msize = sizeof(Elf_Dyn); + this->_type = SHT_DYNAMIC; + this->_flags = SHF_ALLOC; + } + + range<typename EntriesT::iterator> entries() { return _entries; } + + /// \returns the index of the entry. + std::size_t addEntry(Elf_Dyn e) { + _entries.push_back(e); + this->_fsize = (_entries.size() * sizeof(Elf_Dyn)) + sizeof(Elf_Dyn); + this->_msize = this->_fsize; + return _entries.size() - 1; + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + // Add the null entry. + Elf_Dyn d; + d.d_tag = 0; + d.d_un.d_val = 0; + _entries.push_back(d); + std::memcpy(dest, _entries.data(), this->_fsize); + } + + virtual void createDefaultEntries() { + bool isRela = this->_context.isRelaOutputFormat(); + + Elf_Dyn dyn; + dyn.d_un.d_val = 0; + + dyn.d_tag = DT_HASH; + _dt_hash = addEntry(dyn); + dyn.d_tag = DT_STRTAB; + _dt_strtab = addEntry(dyn); + dyn.d_tag = DT_SYMTAB; + _dt_symtab = addEntry(dyn); + dyn.d_tag = DT_STRSZ; + _dt_strsz = addEntry(dyn); + dyn.d_tag = DT_SYMENT; + _dt_syment = addEntry(dyn); + if (_layout.hasDynamicRelocationTable()) { + dyn.d_tag = isRela ? DT_RELA : DT_REL; + _dt_rela = addEntry(dyn); + dyn.d_tag = isRela ? DT_RELASZ : DT_RELSZ; + _dt_relasz = addEntry(dyn); + dyn.d_tag = isRela ? DT_RELAENT : DT_RELENT; + _dt_relaent = addEntry(dyn); + + if (_layout.getDynamicRelocationTable()->canModifyReadonlySection()) { + dyn.d_tag = DT_TEXTREL; + _dt_textrel = addEntry(dyn); + } + } + if (_layout.hasPLTRelocationTable()) { + dyn.d_tag = DT_PLTRELSZ; + _dt_pltrelsz = addEntry(dyn); + dyn.d_tag = getGotPltTag(); + _dt_pltgot = addEntry(dyn); + dyn.d_tag = DT_PLTREL; + dyn.d_un.d_val = isRela ? DT_RELA : DT_REL; + _dt_pltrel = addEntry(dyn); + dyn.d_un.d_val = 0; + dyn.d_tag = DT_JMPREL; + _dt_jmprel = addEntry(dyn); + } + } + + void doPreFlight() override { + Elf_Dyn dyn; + dyn.d_un.d_val = 0; + auto initArray = _layout.findOutputSection(".init_array"); + auto finiArray = _layout.findOutputSection(".fini_array"); + if (initArray) { + dyn.d_tag = DT_INIT_ARRAY; + _dt_init_array = addEntry(dyn); + dyn.d_tag = DT_INIT_ARRAYSZ; + _dt_init_arraysz = addEntry(dyn); + } + if (finiArray) { + dyn.d_tag = DT_FINI_ARRAY; + _dt_fini_array = addEntry(dyn); + dyn.d_tag = DT_FINI_ARRAYSZ; + _dt_fini_arraysz = addEntry(dyn); + } + if (getInitAtomLayout()) { + dyn.d_tag = DT_INIT; + _dt_init = addEntry(dyn); + } + if (getFiniAtomLayout()) { + dyn.d_tag = DT_FINI; + _dt_fini = addEntry(dyn); + } + } + + /// \brief Dynamic table tag for .got.plt section referencing. + /// Usually but not always targets use DT_PLTGOT for that. + virtual int64_t getGotPltTag() { return DT_PLTGOT; } + + void finalize() override { + StringTable<ELFT> *dynamicStringTable = + _dynamicSymbolTable->getStringTable(); + this->_link = dynamicStringTable->ordinal(); + if (this->_outputSection) { + this->_outputSection->setType(this->_type); + this->_outputSection->setInfo(this->_info); + this->_outputSection->setLink(this->_link); + } + } + + void setSymbolTable(DynamicSymbolTable<ELFT> *dynsym) { + _dynamicSymbolTable = dynsym; + } + + const DynamicSymbolTable<ELFT> *getSymbolTable() const { + return _dynamicSymbolTable; + } + + void setHashTable(HashSection<ELFT> *hsh) { _hashTable = hsh; } + + virtual void updateDynamicTable() { + StringTable<ELFT> *dynamicStringTable = + _dynamicSymbolTable->getStringTable(); + _entries[_dt_hash].d_un.d_val = _hashTable->virtualAddr(); + _entries[_dt_strtab].d_un.d_val = dynamicStringTable->virtualAddr(); + _entries[_dt_symtab].d_un.d_val = _dynamicSymbolTable->virtualAddr(); + _entries[_dt_strsz].d_un.d_val = dynamicStringTable->memSize(); + _entries[_dt_syment].d_un.d_val = _dynamicSymbolTable->getEntSize(); + auto initArray = _layout.findOutputSection(".init_array"); + if (initArray) { + _entries[_dt_init_array].d_un.d_val = initArray->virtualAddr(); + _entries[_dt_init_arraysz].d_un.d_val = initArray->memSize(); + } + auto finiArray = _layout.findOutputSection(".fini_array"); + if (finiArray) { + _entries[_dt_fini_array].d_un.d_val = finiArray->virtualAddr(); + _entries[_dt_fini_arraysz].d_un.d_val = finiArray->memSize(); + } + if (const auto *al = getInitAtomLayout()) + _entries[_dt_init].d_un.d_val = getAtomVirtualAddress(al); + if (const auto *al = getFiniAtomLayout()) + _entries[_dt_fini].d_un.d_val = getAtomVirtualAddress(al); + if (_layout.hasDynamicRelocationTable()) { + auto relaTbl = _layout.getDynamicRelocationTable(); + _entries[_dt_rela].d_un.d_val = relaTbl->virtualAddr(); + _entries[_dt_relasz].d_un.d_val = relaTbl->memSize(); + _entries[_dt_relaent].d_un.d_val = relaTbl->getEntSize(); + } + if (_layout.hasPLTRelocationTable()) { + auto relaTbl = _layout.getPLTRelocationTable(); + _entries[_dt_jmprel].d_un.d_val = relaTbl->virtualAddr(); + _entries[_dt_pltrelsz].d_un.d_val = relaTbl->memSize(); + auto gotplt = _layout.findOutputSection(".got.plt"); + _entries[_dt_pltgot].d_un.d_val = gotplt->virtualAddr(); + } + } + +protected: + EntriesT _entries; + + /// \brief Return a virtual address (maybe adjusted) for the atom layout + /// Some targets like microMIPS and ARM Thumb use the last bit + /// of a symbol's value to mark 'compressed' code. This function allows + /// to adjust a virtal address before using it in the dynamic table tag. + virtual uint64_t getAtomVirtualAddress(const AtomLayout *al) const { + return al->_virtualAddr; + } + +private: + std::size_t _dt_hash; + std::size_t _dt_strtab; + std::size_t _dt_symtab; + std::size_t _dt_rela; + std::size_t _dt_relasz; + std::size_t _dt_relaent; + std::size_t _dt_strsz; + std::size_t _dt_syment; + std::size_t _dt_pltrelsz; + std::size_t _dt_pltgot; + std::size_t _dt_pltrel; + std::size_t _dt_jmprel; + std::size_t _dt_init_array; + std::size_t _dt_init_arraysz; + std::size_t _dt_fini_array; + std::size_t _dt_fini_arraysz; + std::size_t _dt_textrel; + std::size_t _dt_init; + std::size_t _dt_fini; + TargetLayout<ELFT> &_layout; + DynamicSymbolTable<ELFT> *_dynamicSymbolTable; + HashSection<ELFT> *_hashTable; + + const AtomLayout *getInitAtomLayout() { + auto al = _layout.findAtomLayoutByName(this->_context.initFunction()); + if (al && isa<DefinedAtom>(al->_atom)) + return al; + return nullptr; + } + + const AtomLayout *getFiniAtomLayout() { + auto al = _layout.findAtomLayoutByName(this->_context.finiFunction()); + if (al && isa<DefinedAtom>(al->_atom)) + return al; + return nullptr; + } +}; + +template <class ELFT> class InterpSection : public Section<ELFT> { +public: + InterpSection(const ELFLinkingContext &context, StringRef str, int32_t order, + StringRef interp) + : Section<ELFT>(context, str, "Dynamic:Interp"), _interp(interp) { + this->setOrder(order); + this->_alignment = 1; + // + 1 for null term. + this->_fsize = interp.size() + 1; + this->_msize = this->_fsize; + this->_type = SHT_PROGBITS; + this->_flags = SHF_ALLOC; + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + std::memcpy(dest, _interp.data(), _interp.size()); + } + +private: + StringRef _interp; +}; + +/// The hash table in the dynamic linker is organized into +/// +/// [ nbuckets ] +/// [ nchains ] +/// [ buckets[0] ] +/// ......................... +/// [ buckets[nbuckets-1] ] +/// [ chains[0] ] +/// ......................... +/// [ chains[nchains - 1] ] +/// +/// nbuckets - total number of hash buckets +/// nchains is equal to the number of dynamic symbols. +/// +/// The symbol is searched by the dynamic linker using the below approach. +/// * Calculate the hash of the symbol that needs to be searched +/// * Take the value from the buckets[hash % nbuckets] as the index of symbol +/// * Compare the symbol's name, if true return, if false, look through the +/// * array since there was a collision + +template <class ELFT> class HashSection : public Section<ELFT> { + struct SymbolTableEntry { + StringRef _name; + uint32_t _index; + }; + +public: + HashSection(const ELFLinkingContext &context, StringRef name, int32_t order) + : Section<ELFT>(context, name, "Dynamic:Hash"), _symbolTable(nullptr) { + this->setOrder(order); + this->_entSize = 4; + this->_type = SHT_HASH; + this->_flags = SHF_ALLOC; + this->_alignment = ELFT::Is64Bits ? 8 : 4; + this->_fsize = 0; + this->_msize = 0; + } + + /// \brief add the dynamic symbol into the table so that the + /// hash could be calculated + void addSymbol(StringRef name, uint32_t index) { + SymbolTableEntry ste; + ste._name = name; + ste._index = index; + _entries.push_back(ste); + } + + /// \brief Set the dynamic symbol table + void setSymbolTable(const DynamicSymbolTable<ELFT> *symbolTable) { + _symbolTable = symbolTable; + } + + // The size of the section has to be determined so that fileoffsets + // may be properly assigned. Let's calculate the buckets and the chains + // and fill the chains and the buckets hash table used by the dynamic + // linker and update the filesize and memory size accordingly + void doPreFlight() override { + // The number of buckets to use for a certain number of symbols. + // If there are less than 3 symbols, 1 bucket will be used. If + // there are less than 17 symbols, 3 buckets will be used, and so + // forth. The bucket numbers are defined by GNU ld. We use the + // same rules here so we generate hash sections with the same + // size as those generated by GNU ld. + uint32_t hashBuckets[] = { 1, 3, 17, 37, 67, 97, 131, 197, 263, 521, 1031, + 2053, 4099, 8209, 16411, 32771, 65537, 131101, + 262147 }; + int hashBucketsCount = sizeof(hashBuckets) / sizeof(uint32_t); + + unsigned int bucketsCount = 0; + unsigned int dynSymCount = _entries.size(); + + // Get the number of buckes that we want to use + for (int i = 0; i < hashBucketsCount; ++i) { + if (dynSymCount < hashBuckets[i]) + break; + bucketsCount = hashBuckets[i]; + } + _buckets.resize(bucketsCount); + _chains.resize(_entries.size()); + + // Create the hash table for the dynamic linker + for (auto ai : _entries) { + unsigned int dynsymIndex = ai._index; + unsigned int bucketpos = llvm::object::elf_hash(ai._name) % bucketsCount; + _chains[dynsymIndex] = _buckets[bucketpos]; + _buckets[bucketpos] = dynsymIndex; + } + + this->_fsize = (2 + _chains.size() + _buckets.size()) * sizeof(uint32_t); + this->_msize = this->_fsize; + } + + void finalize() override { + this->_link = _symbolTable ? _symbolTable->ordinal() : 0; + if (this->_outputSection) + this->_outputSection->setLink(this->_link); + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + uint32_t bucketChainCounts[2]; + bucketChainCounts[0] = _buckets.size(); + bucketChainCounts[1] = _chains.size(); + std::memcpy(dest, (char *)bucketChainCounts, sizeof(bucketChainCounts)); + dest += sizeof(bucketChainCounts); + // write bucket values + for (auto bi : _buckets) { + uint32_t val = (bi); + std::memcpy(dest, &val, sizeof(uint32_t)); + dest += sizeof(uint32_t); + } + // write chain values + for (auto ci : _chains) { + uint32_t val = (ci); + std::memcpy(dest, &val, sizeof(uint32_t)); + dest += sizeof(uint32_t); + } + } + +private: + std::vector<SymbolTableEntry> _entries; + std::vector<uint32_t> _buckets; + std::vector<uint32_t> _chains; + const DynamicSymbolTable<ELFT> *_symbolTable; +}; + +template <class ELFT> class EHFrameHeader : public Section<ELFT> { +public: + EHFrameHeader(const ELFLinkingContext &context, StringRef name, + TargetLayout<ELFT> &layout, int32_t order) + : Section<ELFT>(context, name, "EHFrameHeader"), _ehFrameOffset(0), + _layout(layout) { + this->setOrder(order); + this->_entSize = 0; + this->_type = SHT_PROGBITS; + this->_flags = SHF_ALLOC; + this->_alignment = ELFT::Is64Bits ? 8 : 4; + // Minimum size for empty .eh_frame_hdr. + this->_fsize = 1 + 1 + 1 + 1 + 4; + this->_msize = this->_fsize; + } + + void doPreFlight() override { + // TODO: Generate a proper binary search table. + } + + void finalize() override { + OutputSection<ELFT> *s = _layout.findOutputSection(".eh_frame"); + OutputSection<ELFT> *h = _layout.findOutputSection(".eh_frame_hdr"); + if (s && h) + _ehFrameOffset = s->virtualAddr() - (h->virtualAddr() + 4); + } + + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) override { + uint8_t *chunkBuffer = buffer.getBufferStart(); + uint8_t *dest = chunkBuffer + this->fileOffset(); + int pos = 0; + dest[pos++] = 1; // version + dest[pos++] = llvm::dwarf::DW_EH_PE_pcrel | + llvm::dwarf::DW_EH_PE_sdata4; // eh_frame_ptr_enc + dest[pos++] = llvm::dwarf::DW_EH_PE_omit; // fde_count_enc + dest[pos++] = llvm::dwarf::DW_EH_PE_omit; // table_enc + *reinterpret_cast<typename llvm::object::ELFFile<ELFT>::Elf_Sword *>( + dest + pos) = _ehFrameOffset; + } + +private: + int32_t _ehFrameOffset; + TargetLayout<ELFT> &_layout; +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/SegmentChunks.h b/lib/ReaderWriter/ELF/SegmentChunks.h new file mode 100644 index 000000000000..f2a975aaeed0 --- /dev/null +++ b/lib/ReaderWriter/ELF/SegmentChunks.h @@ -0,0 +1,686 @@ +//===- lib/ReaderWriter/ELF/SegmentChunks.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_SEGMENT_CHUNKS_H +#define LLD_READER_WRITER_ELF_SEGMENT_CHUNKS_H + +#include "Chunk.h" +#include "Layout.h" +#include "SectionChunks.h" +#include "Writer.h" +#include "lld/Core/range.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <memory> + +namespace lld { +namespace elf { + +template <typename ELFT> class DefaultLayout; + +/// \brief A segment can be divided into segment slices +/// depending on how the segments can be split +template<class ELFT> +class SegmentSlice { +public: + typedef typename std::vector<Chunk<ELFT> *>::iterator SectionIter; + + SegmentSlice() { } + + /// Set the start of the slice. + void setStart(int32_t s) { _startSection = s; } + + // Set the segment slice start and end iterators. This is used to walk through + // the sections that are part of the Segment slice + void setSections(range<SectionIter> sections) { _sections = sections; } + + // Return the fileOffset of the slice + uint64_t fileOffset() const { return _offset; } + + void setFileOffset(uint64_t offset) { _offset = offset; } + + // Return the size of the slice + uint64_t fileSize() const { return _fsize; } + + void setFileSize(uint64_t filesz) { _fsize = filesz; } + + // Return the start of the slice + int32_t startSection() const { return _startSection; } + + // Return the start address of the slice + uint64_t virtualAddr() const { return _addr; } + + // Return the memory size of the slice + uint64_t memSize() const { return _memSize; } + + // Return the alignment of the slice + uint64_t alignment() const { return _alignment; } + + void setMemSize(uint64_t memsz) { _memSize = memsz; } + + void setVirtualAddr(uint64_t addr) { _addr = addr; } + + void setAlign(uint64_t align) { _alignment = align; } + + static bool compare_slices(SegmentSlice<ELFT> *a, SegmentSlice<ELFT> *b) { + return a->startSection() < b->startSection(); + } + + range<SectionIter> sections() { return _sections; } + +private: + range<SectionIter> _sections; + int32_t _startSection; + uint64_t _addr; + uint64_t _offset; + uint64_t _alignment; + uint64_t _fsize; + uint64_t _memSize; +}; + +/// \brief A segment contains a set of sections, that have similar properties +// the sections are already separated based on different flags and properties +// the segment is just a way to concatenate sections to segments +template<class ELFT> +class Segment : public Chunk<ELFT> { +public: + typedef typename std::vector<SegmentSlice<ELFT> *>::iterator SliceIter; + typedef typename std::vector<Chunk<ELFT> *>::iterator SectionIter; + + Segment(const ELFLinkingContext &context, StringRef name, + const Layout::SegmentType type); + + /// \brief the Order of segments that appear in the output file + enum SegmentOrder { + permUnknown, + permRWX, + permRX, + permR, + permRWL, + permRW, + permNonAccess + }; + + /// append a section to a segment + virtual void append(Chunk<ELFT> *chunk); + + /// Sort segments depending on the property + /// If we have a Program Header segment, it should appear first + /// If we have a INTERP segment, that should appear after the Program Header + /// All Loadable segments appear next in this order + /// All Read Write Execute segments follow + /// All Read Execute segments appear next + /// All Read only segments appear first + /// All Write execute segments follow + static bool compareSegments(Segment<ELFT> *sega, Segment<ELFT> *segb); + + /// \brief Start assigning file offset to the segment chunks The fileoffset + /// needs to be page at the start of the segment and in addition the + /// fileoffset needs to be aligned to the max section alignment within the + /// segment. This is required so that the ELF property p_poffset % p_align = + /// p_vaddr mod p_align holds true. + /// The algorithm starts off by assigning the startOffset thats passed in as + /// parameter to the first section in the segment, if the difference between + /// the newly computed offset is greater than a page, then we create a segment + /// slice, as it would be a waste of virtual memory just to be filled with + /// zeroes + void assignFileOffsets(uint64_t startOffset); + + /// \brief Assign virtual addresses to the slices + void assignVirtualAddress(uint64_t addr); + + // Write the Segment + void write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer); + + int64_t flags() const; + + /// Prepend a generic chunk to the segment. + void prepend(Chunk<ELFT> *c) { + _sections.insert(_sections.begin(), c); + } + + /// Finalize the segment before assigning File Offsets / Virtual addresses + void doPreFlight() {} + + /// Finalize the segment, before we want to write the segment header + /// information + void finalize() { + // We want to finalize the segment values for now only for non loadable + // segments, since those values are not set in the Layout + if (_segmentType == llvm::ELF::PT_LOAD) + return; + // The size is the difference of the + // last section to the first section, especially for TLS because + // the TLS segment contains both .tdata/.tbss + this->setFileOffset(_sections.front()->fileOffset()); + this->setVirtualAddr(_sections.front()->virtualAddr()); + size_t startFileOffset = _sections.front()->fileOffset(); + size_t startAddr = _sections.front()->virtualAddr(); + for (auto ai : _sections) { + this->_fsize = ai->fileOffset() + ai->fileSize() - startFileOffset; + this->_msize = ai->virtualAddr() + ai->memSize() - startAddr; + } + } + + // For LLVM RTTI + static bool classof(const Chunk<ELFT> *c) { + return c->kind() == Chunk<ELFT>::Kind::ELFSegment; + } + + // Getters + int32_t sectionCount() const { return _sections.size(); } + + /// \brief, this function returns the type of segment (PT_*) + Layout::SegmentType segmentType() { return _segmentType; } + + /// \brief return the segment type depending on the content, + /// If the content corresponds to Code, this will return Segment::Code + /// If the content corresponds to Data, this will return Segment::Data + /// If the content corresponds to TLS, this will return Segment::TLS + virtual int getContentType() const { + int64_t fl = flags(); + switch (_segmentType) { + case llvm::ELF::PT_LOAD: { + if (fl && llvm::ELF::PF_X) + return Chunk<ELFT>::ContentType::Code; + if (fl && llvm::ELF::PF_W) + return Chunk<ELFT>::ContentType::Data; + } + case llvm::ELF::PT_TLS: + return Chunk<ELFT>::ContentType::TLS; + case llvm::ELF::PT_NOTE: + return Chunk<ELFT>::ContentType::Note; + default: + return Chunk<ELFT>::ContentType::Unknown; + } + } + + int pageSize() const { return this->_context.getPageSize(); } + + int rawflags() const { return _atomflags; } + + int64_t atomflags() const { + switch (_atomflags) { + + case DefinedAtom::permUnknown: + return permUnknown; + + case DefinedAtom::permRWX: + return permRWX; + + case DefinedAtom::permR_X: + return permRX; + + case DefinedAtom::permR__: + return permR; + + case DefinedAtom::permRW_L: + return permRWL; + + case DefinedAtom::permRW_: + return permRW; + + case DefinedAtom::perm___: + default: + return permNonAccess; + } + } + + int64_t numSlices() const { return _segmentSlices.size(); } + + range<SliceIter> slices() { return _segmentSlices; } + + Chunk<ELFT> *firstSection() { return _sections[0]; } + +private: + + /// \brief Check if the chunk needs to be aligned + bool needAlign(Chunk<ELFT> *chunk) const { + if (chunk->getContentType() == Chunk<ELFT>::ContentType::Data && + _outputMagic == ELFLinkingContext::OutputMagic::NMAGIC) + return true; + return false; + } + + // Cached value of outputMagic + ELFLinkingContext::OutputMagic _outputMagic; + +protected: + /// \brief Section or some other chunk type. + std::vector<Chunk<ELFT> *> _sections; + std::vector<SegmentSlice<ELFT> *> _segmentSlices; + Layout::SegmentType _segmentType; + uint64_t _flags; + int64_t _atomflags; + llvm::BumpPtrAllocator _segmentAllocate; +}; + +/// This chunk represents a linker script expression that needs to be calculated +/// at the time the virtual addresses for the parent segment are being assigned. +template <class ELFT> class ExpressionChunk : public Chunk<ELFT> { +public: + ExpressionChunk(ELFLinkingContext &ctx, const script::SymbolAssignment *expr) + : Chunk<ELFT>(StringRef(), Chunk<ELFT>::Kind::Expression, ctx), + _expr(expr), _linkerScriptSema(ctx.linkerScriptSema()) { + this->_alignment = 1; + } + + static bool classof(const Chunk<ELFT> *c) { + return c->kind() == Chunk<ELFT>::Kind::Expression; + } + + int getContentType() const override { + return Chunk<ELFT>::ContentType::Unknown; + } + void write(ELFWriter *, TargetLayout<ELFT> &, + llvm::FileOutputBuffer &) override {} + void doPreFlight() override {} + void finalize() override {} + + std::error_code evalExpr(uint64_t &curPos) { + return _linkerScriptSema.evalExpr(_expr, curPos); + } + +private: + const script::SymbolAssignment *_expr; + script::Sema &_linkerScriptSema; +}; + +/// \brief A Program Header segment contains a set of chunks instead of sections +/// The segment doesn't contain any slice +template <class ELFT> class ProgramHeaderSegment : public Segment<ELFT> { +public: + ProgramHeaderSegment(const ELFLinkingContext &context) + : Segment<ELFT>(context, "PHDR", llvm::ELF::PT_PHDR) { + this->_alignment = 8; + this->_flags = (llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR); + } + + /// Finalize the segment, before we want to write the segment header + /// information + void finalize() { + // If the segment is of type Program Header, then the values fileOffset + // and the fileSize need to be picked up from the last section, the first + // section points to the ELF header and the second chunk points to the + // actual program headers + this->setFileOffset(this->_sections.back()->fileOffset()); + this->setVirtualAddr(this->_sections.back()->virtualAddr()); + this->_fsize = this->_sections.back()->fileSize(); + this->_msize = this->_sections.back()->memSize(); + } + +}; + +template <class ELFT> +Segment<ELFT>::Segment(const ELFLinkingContext &context, StringRef name, + const Layout::SegmentType type) + : Chunk<ELFT>(name, Chunk<ELFT>::Kind::ELFSegment, context), + _segmentType(type), _flags(0), _atomflags(0) { + this->_alignment = 0; + this->_fsize = 0; + _outputMagic = context.getOutputMagic(); +} + +// This function actually is used, but not in all instantiations of Segment. +LLVM_ATTRIBUTE_UNUSED +static DefinedAtom::ContentPermissions toAtomPerms(uint64_t flags) { + switch (flags & (SHF_ALLOC | SHF_WRITE | SHF_EXECINSTR)) { + case SHF_ALLOC | SHF_WRITE | SHF_EXECINSTR: + return DefinedAtom::permRWX; + case SHF_ALLOC | SHF_EXECINSTR: + return DefinedAtom::permR_X; + case SHF_ALLOC: + return DefinedAtom::permR__; + case SHF_ALLOC | SHF_WRITE: + return DefinedAtom::permRW_; + default: + return DefinedAtom::permUnknown; + } +} + +template <class ELFT> void Segment<ELFT>::append(Chunk<ELFT> *chunk) { + _sections.push_back(chunk); + Section<ELFT> *section = dyn_cast<Section<ELFT>>(chunk); + if (!section) + return; + if (_flags < section->getFlags()) + _flags |= section->getFlags(); + if (_atomflags < toAtomPerms(_flags)) + _atomflags = toAtomPerms(_flags); + if (this->_alignment < section->alignment()) + this->_alignment = section->alignment(); +} + +template <class ELFT> +bool Segment<ELFT>::compareSegments(Segment<ELFT> *sega, Segment<ELFT> *segb) { + int64_t type1 = sega->segmentType(); + int64_t type2 = segb->segmentType(); + + if (type1 == type2) + return sega->atomflags() < segb->atomflags(); + + // The single PT_PHDR segment is required to precede any loadable + // segment. We simply make it always first. + if (type1 == llvm::ELF::PT_PHDR) + return true; + if (type2 == llvm::ELF::PT_PHDR) + return false; + + // The single PT_INTERP segment is required to precede any loadable + // segment. We simply make it always second. + if (type1 == llvm::ELF::PT_INTERP) + return true; + if (type2 == llvm::ELF::PT_INTERP) + return false; + + // We then put PT_LOAD segments before any other segments. + if (type1 == llvm::ELF::PT_LOAD) + return true; + if (type2 == llvm::ELF::PT_LOAD) + return false; + + // We put the PT_GNU_RELRO segment last, because that is where the + // dynamic linker expects to find it + if (type1 == llvm::ELF::PT_GNU_RELRO) + return false; + if (type2 == llvm::ELF::PT_GNU_RELRO) + return true; + + // We put the PT_TLS segment last except for the PT_GNU_RELRO + // segment, because that is where the dynamic linker expects to find + if (type1 == llvm::ELF::PT_TLS) + return false; + if (type2 == llvm::ELF::PT_TLS) + return true; + + // Otherwise compare the types to establish an arbitrary ordering. + // FIXME: Should figure out if we should just make all other types compare + // equal, but if so, we should probably do the same for atom flags and change + // users of this to use stable_sort. + return type1 < type2; +} + +template <class ELFT> +void Segment<ELFT>::assignFileOffsets(uint64_t startOffset) { + uint64_t fileOffset = startOffset; + uint64_t curSliceFileOffset = fileOffset; + bool isDataPageAlignedForNMagic = false; + bool alignSegments = this->_context.alignSegments(); + uint64_t p_align = this->_context.getPageSize(); + uint64_t lastVirtualAddress = 0; + + this->setFileOffset(startOffset); + for (auto &slice : slices()) { + bool isFirstSection = true; + for (auto section : slice->sections()) { + // Handle linker script expressions, which may change the offset + if (!isFirstSection) + if (auto expr = dyn_cast<ExpressionChunk<ELFT>>(section)) + fileOffset += expr->virtualAddr() - lastVirtualAddress; + // Align fileoffset to the alignment of the section. + fileOffset = llvm::RoundUpToAlignment(fileOffset, section->alignment()); + // If the linker outputmagic is set to OutputMagic::NMAGIC, align the Data + // to a page boundary + if (isFirstSection && + _outputMagic != ELFLinkingContext::OutputMagic::NMAGIC && + _outputMagic != ELFLinkingContext::OutputMagic::OMAGIC) { + // Align to a page only if the output is not + // OutputMagic::NMAGIC/OutputMagic::OMAGIC + if (alignSegments) + fileOffset = llvm::RoundUpToAlignment(fileOffset, p_align); + else { + // Align according to ELF spec. + // in p75, http://www.sco.com/developers/devspecs/gabi41.pdf + uint64_t virtualAddress = slice->virtualAddr(); + Section<ELFT> *sect = dyn_cast<Section<ELFT>>(section); + if (sect && sect->isLoadableSection() && + ((virtualAddress & (p_align - 1)) != + (fileOffset & (p_align - 1)))) + fileOffset = llvm::RoundUpToAlignment(fileOffset, p_align) + + (virtualAddress % p_align); + } + } else if (!isDataPageAlignedForNMagic && needAlign(section)) { + fileOffset = + llvm::RoundUpToAlignment(fileOffset, this->_context.getPageSize()); + isDataPageAlignedForNMagic = true; + } + if (isFirstSection) { + slice->setFileOffset(fileOffset); + isFirstSection = false; + curSliceFileOffset = fileOffset; + } + section->setFileOffset(fileOffset); + fileOffset += section->fileSize(); + lastVirtualAddress = section->virtualAddr() + section->memSize(); + } + slice->setFileSize(fileOffset - curSliceFileOffset); + } + this->setFileSize(fileOffset - startOffset); +} + +/// \brief Assign virtual addresses to the slices +template <class ELFT> void Segment<ELFT>::assignVirtualAddress(uint64_t addr) { + int startSection = 0; + int currSection = 0; + SectionIter startSectionIter; + + // slice align is set to the max alignment of the chunks that are + // contained in the slice + uint64_t sliceAlign = 0; + // Current slice size + uint64_t curSliceSize = 0; + // Current Slice File Offset + uint64_t curSliceAddress = 0; + + startSectionIter = _sections.begin(); + startSection = 0; + bool isFirstSection = true; + bool isDataPageAlignedForNMagic = false; + uint64_t startAddr = addr; + SegmentSlice<ELFT> *slice = nullptr; + uint64_t tlsStartAddr = 0; + bool alignSegments = this->_context.alignSegments(); + StringRef prevOutputSectionName = StringRef(); + + for (auto si = _sections.begin(); si != _sections.end(); ++si) { + // If this is first section in the segment, page align the section start + // address. The linker needs to align the data section to a page boundary + // only if NMAGIC is set. + if (isFirstSection) { + isFirstSection = false; + if (alignSegments && + _outputMagic != ELFLinkingContext::OutputMagic::NMAGIC && + _outputMagic != ELFLinkingContext::OutputMagic::OMAGIC) + // Align to a page only if the output is not + // OutputMagic::NMAGIC/OutputMagic::OMAGIC + startAddr = + llvm::RoundUpToAlignment(startAddr, this->_context.getPageSize()); + else if (!isDataPageAlignedForNMagic && needAlign(*si)) { + // If the linker outputmagic is set to OutputMagic::NMAGIC, align the + // Data to a page boundary. + startAddr = + llvm::RoundUpToAlignment(startAddr, this->_context.getPageSize()); + isDataPageAlignedForNMagic = true; + } + // align the startOffset to the section alignment + uint64_t newAddr = llvm::RoundUpToAlignment(startAddr, (*si)->alignment()); + // Handle linker script expressions, which *may update newAddr* if the + // expression assigns to "." + if (auto expr = dyn_cast<ExpressionChunk<ELFT>>(*si)) + expr->evalExpr(newAddr); + curSliceAddress = newAddr; + sliceAlign = (*si)->alignment(); + (*si)->setVirtualAddr(curSliceAddress); + + // Handle TLS. + if (auto section = dyn_cast<Section<ELFT>>(*si)) { + if (section->getSegmentType() == llvm::ELF::PT_TLS) { + tlsStartAddr = + llvm::RoundUpToAlignment(tlsStartAddr, (*si)->alignment()); + section->assignVirtualAddress(tlsStartAddr); + tlsStartAddr += (*si)->memSize(); + } else { + section->assignVirtualAddress(newAddr); + } + } + // TBSS section is special in that it doesn't contribute to memory of any + // segment. If we see a tbss section, don't add memory size to addr The + // fileOffset is automatically taken care of since TBSS section does not + // end up using file size + if ((*si)->order() != DefaultLayout<ELFT>::ORDER_TBSS) + curSliceSize = (*si)->memSize(); + } else { + uint64_t curAddr = curSliceAddress + curSliceSize; + if (!isDataPageAlignedForNMagic && needAlign(*si)) { + // If the linker outputmagic is set to OutputMagic::NMAGIC, align the + // Data + // to a page boundary + curAddr = + llvm::RoundUpToAlignment(curAddr, this->_context.getPageSize()); + isDataPageAlignedForNMagic = true; + } + uint64_t newAddr = llvm::RoundUpToAlignment(curAddr, (*si)->alignment()); + // Handle linker script expressions, which *may update newAddr* if the + // expression assigns to "." + if (auto expr = dyn_cast<ExpressionChunk<ELFT>>(*si)) + expr->evalExpr(newAddr); + Section<ELFT> *sec = dyn_cast<Section<ELFT>>(*si); + StringRef curOutputSectionName; + if (sec) + curOutputSectionName = sec->outputSectionName(); + else { + // If this is a linker script expression, propagate the name of the + // previous section instead + if (isa<ExpressionChunk<ELFT>>(*si)) + curOutputSectionName = prevOutputSectionName; + else + curOutputSectionName = (*si)->name(); + } + bool autoCreateSlice = true; + if (curOutputSectionName == prevOutputSectionName) + autoCreateSlice = false; + // If the newAddress computed is more than a page away, let's create + // a separate segment, so that memory is not used up while running. + // Dont create a slice, if the new section falls in the same output + // section as the previous section. + if (autoCreateSlice && + ((newAddr - curAddr) > this->_context.getPageSize()) && + (_outputMagic != ELFLinkingContext::OutputMagic::NMAGIC && + _outputMagic != ELFLinkingContext::OutputMagic::OMAGIC)) { + auto sliceIter = + std::find_if(_segmentSlices.begin(), _segmentSlices.end(), + [startSection](SegmentSlice<ELFT> *s) -> bool { + return s->startSection() == startSection; + }); + if (sliceIter == _segmentSlices.end()) { + slice = new (_segmentAllocate.Allocate<SegmentSlice<ELFT>>()) + SegmentSlice<ELFT>(); + _segmentSlices.push_back(slice); + } else { + slice = (*sliceIter); + } + slice->setStart(startSection); + slice->setSections(make_range(startSectionIter, si)); + slice->setMemSize(curSliceSize); + slice->setAlign(sliceAlign); + slice->setVirtualAddr(curSliceAddress); + // Start new slice + curSliceAddress = newAddr; + (*si)->setVirtualAddr(curSliceAddress); + startSectionIter = si; + startSection = currSection; + if (auto section = dyn_cast<Section<ELFT>>(*si)) + section->assignVirtualAddress(newAddr); + curSliceSize = newAddr - curSliceAddress + (*si)->memSize(); + sliceAlign = (*si)->alignment(); + } else { + if (sliceAlign < (*si)->alignment()) + sliceAlign = (*si)->alignment(); + (*si)->setVirtualAddr(newAddr); + // Handle TLS. + if (auto section = dyn_cast<Section<ELFT>>(*si)) { + if (section->getSegmentType() == llvm::ELF::PT_TLS) { + tlsStartAddr = + llvm::RoundUpToAlignment(tlsStartAddr, (*si)->alignment()); + section->assignVirtualAddress(tlsStartAddr); + tlsStartAddr += (*si)->memSize(); + } else { + section->assignVirtualAddress(newAddr); + } + } + // TBSS section is special in that it doesn't contribute to memory of + // any segment. If we see a tbss section, don't add memory size to addr + // The fileOffset is automatically taken care of since TBSS section does + // not end up using file size. + if ((*si)->order() != DefaultLayout<ELFT>::ORDER_TBSS) + curSliceSize = newAddr - curSliceAddress + (*si)->memSize(); + else + curSliceSize = newAddr - curSliceAddress; + } + prevOutputSectionName = curOutputSectionName; + } + currSection++; + } + auto sliceIter = std::find_if(_segmentSlices.begin(), _segmentSlices.end(), + [startSection](SegmentSlice<ELFT> *s) -> bool { + return s->startSection() == startSection; + }); + if (sliceIter == _segmentSlices.end()) { + slice = new (_segmentAllocate.Allocate<SegmentSlice<ELFT>>()) + SegmentSlice<ELFT>(); + _segmentSlices.push_back(slice); + } else { + slice = (*sliceIter); + } + slice->setStart(startSection); + slice->setVirtualAddr(curSliceAddress); + slice->setMemSize(curSliceSize); + slice->setSections(make_range(startSectionIter, _sections.end())); + slice->setAlign(sliceAlign); + + // Set the segment memory size and the virtual address. + this->setMemSize(curSliceAddress - startAddr + curSliceSize); + this->setVirtualAddr(curSliceAddress); + std::stable_sort(_segmentSlices.begin(), _segmentSlices.end(), + SegmentSlice<ELFT>::compare_slices); +} + +// Write the Segment +template <class ELFT> +void Segment<ELFT>::write(ELFWriter *writer, TargetLayout<ELFT> &layout, + llvm::FileOutputBuffer &buffer) { + for (auto slice : slices()) + for (auto section : slice->sections()) + section->write(writer, layout, buffer); +} + +template<class ELFT> +int64_t +Segment<ELFT>::flags() const { + int64_t fl = 0; + if (_flags & llvm::ELF::SHF_ALLOC) + fl |= llvm::ELF::PF_R; + if (_flags & llvm::ELF::SHF_WRITE) + fl |= llvm::ELF::PF_W; + if (_flags & llvm::ELF::SHF_EXECINSTR) + fl |= llvm::ELF::PF_X; + return fl; +} +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/TODO.txt b/lib/ReaderWriter/ELF/TODO.txt new file mode 100644 index 000000000000..90c334b781ba --- /dev/null +++ b/lib/ReaderWriter/ELF/TODO.txt @@ -0,0 +1,18 @@ +lib/ReaderWriter/ELF +~~~~~~~~~~~~~~~~~~~~ + +- Implement processing of DT_NEEDED elements including -rpath-link / + -rpath processing. + +- _GLOBAL_OFFSET_TABLE should be hidden and normally dropped from the output. + +- Merge SHT_NOTE sections only if applicable. + +- Do not create __got_* / __plt_* symbol table entries by default. + +- Weak references to symbols defined in a DSO should remain weak. + +- Fix section flags as they appear in input (update content permissions) + +- Check for errors in the ELFReader when creating atoms for LinkOnce + sections/Group sections. Add tests to account for the change when it happens. diff --git a/lib/ReaderWriter/ELF/TargetHandler.h b/lib/ReaderWriter/ELF/TargetHandler.h new file mode 100644 index 000000000000..ca7a442276d1 --- /dev/null +++ b/lib/ReaderWriter/ELF/TargetHandler.h @@ -0,0 +1,86 @@ +//===- lib/ReaderWriter/ELF/TargetHandler.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief These interfaces provide target specific hooks to change the linker's +/// behaivor. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_TARGET_HANDLER_H + +#include "Layout.h" +#include "lld/Core/Atom.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/STDExtras.h" +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include <memory> +#include <vector> + +namespace lld { +namespace elf { +template <class ELFT> class DynamicTable; +template <class ELFT> class DynamicSymbolTable; +template <class ELFT> class ELFDefinedAtom; +template <class ELFT> class ELFReference; +class ELFWriter; +template <class ELFT> class ELFHeader; +template <class ELFT> class Section; +template <class ELFT> class TargetLayout; + +class TargetRelocationHandler { +public: + /// Constructor + TargetRelocationHandler() {} + virtual ~TargetRelocationHandler() {} + + virtual std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const = 0; +}; + +/// \brief TargetHandler contains all the information responsible to handle a +/// a particular target on ELF. A target might wish to override implementation +/// of creating atoms and how the atoms are written to the output file. +template <class ELFT> class TargetHandler : public TargetHandlerBase { +public: + /// The layout determined completely by the Target. + virtual TargetLayout<ELFT> &getTargetLayout() = 0; + + /// Determine how relocations need to be applied. + virtual const TargetRelocationHandler &getRelocationHandler() const = 0; + + /// How does the target deal with reading input files. + virtual std::unique_ptr<Reader> getObjReader() = 0; + + /// How does the target deal with reading dynamic libraries. + virtual std::unique_ptr<Reader> getDSOReader() = 0; + + /// How does the target deal with writing ELF output. + virtual std::unique_ptr<Writer> getWriter() = 0; +}; + +inline std::error_code make_unhandled_reloc_error() { + return make_dynamic_error_code(Twine("Unhandled reference type")); +} + +inline std::error_code make_out_of_range_reloc_error() { + return make_dynamic_error_code(Twine("Relocation out of range")); +} + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/TargetLayout.h b/lib/ReaderWriter/ELF/TargetLayout.h new file mode 100644 index 000000000000..ab7a7890a274 --- /dev/null +++ b/lib/ReaderWriter/ELF/TargetLayout.h @@ -0,0 +1,28 @@ +//===- lib/ReaderWriter/ELF/TargetLayout.h --------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_TARGET_LAYOUT_H +#define LLD_READER_WRITER_ELF_TARGET_LAYOUT_H + +#include "DefaultLayout.h" +#include "lld/Core/LLVM.h" + +namespace lld { +namespace elf { +/// \brief The target can override certain functions in the DefaultLayout +/// class so that the order, the name of the section and the segment type could +/// be changed in the final layout +template <class ELFT> class TargetLayout : public DefaultLayout<ELFT> { +public: + TargetLayout(ELFLinkingContext &context) : DefaultLayout<ELFT>(context) {} +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/Writer.cpp b/lib/ReaderWriter/ELF/Writer.cpp new file mode 100644 index 000000000000..3071827e07d0 --- /dev/null +++ b/lib/ReaderWriter/ELF/Writer.cpp @@ -0,0 +1,23 @@ +//===- lib/ReaderWriter/ELF/WriterELF.cpp ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Writer.h" +#include "DynamicLibraryWriter.h" +#include "ExecutableWriter.h" + +using namespace llvm; +using namespace llvm::object; + +namespace lld { + +std::unique_ptr<Writer> createWriterELF(TargetHandlerBase *handler) { + return std::move(handler->getWriter()); +} + +} // namespace lld diff --git a/lib/ReaderWriter/ELF/Writer.h b/lib/ReaderWriter/ELF/Writer.h new file mode 100644 index 000000000000..1e819467c558 --- /dev/null +++ b/lib/ReaderWriter/ELF/Writer.h @@ -0,0 +1,38 @@ +//===- lib/ReaderWriter/ELF/Writer.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_WRITER_H +#define LLD_READER_WRITER_ELF_WRITER_H + +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" + +namespace lld { +namespace elf { +/// \brief The Writer class is a base class for the linker to write +/// various kinds of ELF files. +class ELFWriter : public Writer { +public: + ELFWriter() { } + +public: + /// \brief builds the chunks that needs to be written to the output + /// ELF file + virtual void buildChunks(const File &file) = 0; + + /// \brief Writes the chunks into the output file specified by path + virtual std::error_code writeFile(const File &file, StringRef path) = 0; + + /// \brief Get the virtual address of \p atom after layout. + virtual uint64_t addressOfAtom(const Atom *atom) = 0; +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86/CMakeLists.txt b/lib/ReaderWriter/ELF/X86/CMakeLists.txt new file mode 100644 index 000000000000..191f7ab3d61d --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_library(lldX86ELFTarget + X86LinkingContext.cpp + X86TargetHandler.cpp + X86RelocationHandler.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/X86/Makefile b/lib/ReaderWriter/ELF/X86/Makefile new file mode 100644 index 000000000000..058d5133eaba --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/Makefile @@ -0,0 +1,15 @@ +##===- lld/lib/ReaderWriter/ELF/X86/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldX86ELFTarget +USEDLIBS = lldCore.a +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/X86/X86DynamicLibraryWriter.h b/lib/ReaderWriter/ELF/X86/X86DynamicLibraryWriter.h new file mode 100644 index 000000000000..86376295bec4 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86DynamicLibraryWriter.h @@ -0,0 +1,67 @@ +//===- lib/ReaderWriter/ELF/X86/X86DynamicLibraryWriter.h -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef X86_X86_DYNAMIC_LIBRARY_WRITER_H +#define X86_X86_DYNAMIC_LIBRARY_WRITER_H + +#include "DynamicLibraryWriter.h" +#include "X86LinkingContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> +class X86DynamicLibraryWriter : public DynamicLibraryWriter<ELFT> { +public: + X86DynamicLibraryWriter(X86LinkingContext &context, + X86TargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues() { + return DynamicLibraryWriter<ELFT>::finalizeDefaultAtomValues(); + } + + virtual void addDefaultAtoms() { + return DynamicLibraryWriter<ELFT>::addDefaultAtoms(); + } + +private: + class GOTFile : public SimpleFile { + public: + GOTFile(const ELFLinkingContext &eti) : SimpleFile("GOTFile") {} + llvm::BumpPtrAllocator _alloc; + }; + + std::unique_ptr<GOTFile> _gotFile; + X86LinkingContext &_context; + X86TargetLayout<ELFT> &_x86Layout; +}; + +template <class ELFT> +X86DynamicLibraryWriter<ELFT>::X86DynamicLibraryWriter( + X86LinkingContext &context, X86TargetLayout<ELFT> &layout) + : DynamicLibraryWriter<ELFT>(context, layout), + _gotFile(new GOTFile(context)), _context(context), _x86Layout(layout) {} + +template <class ELFT> +bool X86DynamicLibraryWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + DynamicLibraryWriter<ELFT>::createImplicitFiles(result); + _gotFile->addAtom(*new (_gotFile->_alloc) GLOBAL_OFFSET_TABLEAtom(*_gotFile)); + _gotFile->addAtom(*new (_gotFile->_alloc) DYNAMICAtom(*_gotFile)); + result.push_back(std::move(_gotFile)); + return true; +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86/X86ELFFile.h b/lib/ReaderWriter/ELF/X86/X86ELFFile.h new file mode 100644 index 000000000000..621c38c43505 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86ELFFile.h @@ -0,0 +1,41 @@ +//===- lib/ReaderWriter/ELF/X86/X86ELFFile.h ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_X86_ELF_FILE_H +#define LLD_READER_WRITER_ELF_X86_X86_ELF_FILE_H + +#include "ELFReader.h" + +namespace lld { +namespace elf { + +class X86LinkingContext; + +template <class ELFT> class X86ELFFile : public ELFFile<ELFT> { +public: + X86ELFFile(std::unique_ptr<MemoryBuffer> mb, X86LinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<X86ELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, X86LinkingContext &ctx) { + return std::unique_ptr<X86ELFFile<ELFT>>( + new X86ELFFile<ELFT>(std::move(mb), ctx)); + } +}; + +template <class ELFT> class X86DynamicFile : public DynamicFile<ELFT> { +public: + X86DynamicFile(const X86LinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_X86_X86_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/X86/X86ELFReader.h b/lib/ReaderWriter/ELF/X86/X86ELFReader.h new file mode 100644 index 000000000000..96186c5eb024 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86ELFReader.h @@ -0,0 +1,62 @@ +//===- lib/ReaderWriter/ELF/X86/X86ELFReader.h ----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_X86_X86_ELF_READER_H +#define LLD_READER_WRITER_X86_X86_ELF_READER_H + +#include "ELFReader.h" +#include "X86ELFFile.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, false> X86ELFType; + +struct X86DynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + X86LinkingContext &ctx) { + return lld::elf::X86DynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct X86ELFFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + X86LinkingContext &ctx) { + return lld::elf::X86ELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +class X86ELFObjectReader + : public ELFObjectReader<X86ELFType, X86ELFFileCreateELFTraits, + X86LinkingContext> { +public: + X86ELFObjectReader(X86LinkingContext &ctx) + : ELFObjectReader<X86ELFType, X86ELFFileCreateELFTraits, + X86LinkingContext>(ctx, llvm::ELF::EM_386) {} +}; + +class X86ELFDSOReader + : public ELFDSOReader<X86ELFType, X86DynamicFileCreateELFTraits, + X86LinkingContext> { +public: + X86ELFDSOReader(X86LinkingContext &ctx) + : ELFDSOReader<X86ELFType, X86DynamicFileCreateELFTraits, + X86LinkingContext>(ctx, llvm::ELF::EM_386) {} +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_X86_X86_ELF_READER_H diff --git a/lib/ReaderWriter/ELF/X86/X86ExecutableWriter.h b/lib/ReaderWriter/ELF/X86/X86ExecutableWriter.h new file mode 100644 index 000000000000..68acc06c2261 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86ExecutableWriter.h @@ -0,0 +1,57 @@ +//===- lib/ReaderWriter/ELF/X86/X86ExecutableWriter.h ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef X86_X86_EXECUTABLE_WRITER_H +#define X86_X86_EXECUTABLE_WRITER_H + +#include "ExecutableWriter.h" +#include "X86LinkingContext.h" + +namespace lld { +namespace elf { + +template <class ELFT> +class X86ExecutableWriter : public ExecutableWriter<ELFT> { +public: + X86ExecutableWriter(X86LinkingContext &context, + X86TargetLayout<ELFT> &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues() { + return ExecutableWriter<ELFT>::finalizeDefaultAtomValues(); + } + + virtual void addDefaultAtoms() { + return ExecutableWriter<ELFT>::addDefaultAtoms(); + } + +private: + X86LinkingContext &_context; + X86TargetLayout<ELFT> &_x86Layout; +}; + +template <class ELFT> +X86ExecutableWriter<ELFT>::X86ExecutableWriter(X86LinkingContext &context, + X86TargetLayout<ELFT> &layout) + : ExecutableWriter<ELFT>(context, layout), _context(context), + _x86Layout(layout) {} + +template <class ELFT> +bool X86ExecutableWriter<ELFT>::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter<ELFT>::createImplicitFiles(result); + return true; +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86/X86LinkingContext.cpp b/lib/ReaderWriter/ELF/X86/X86LinkingContext.cpp new file mode 100644 index 000000000000..26d715cf2953 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86LinkingContext.cpp @@ -0,0 +1,28 @@ +//===- lib/ReaderWriter/ELF/X86/X86LinkingContext.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86LinkingContext.h" +#include "X86TargetHandler.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorOr.h" + +using namespace lld; + +std::unique_ptr<ELFLinkingContext> +elf::X86LinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::x86) + return std::unique_ptr<ELFLinkingContext>( + new elf::X86LinkingContext(triple)); + return nullptr; +} + +elf::X86LinkingContext::X86LinkingContext(llvm::Triple triple) + : ELFLinkingContext(triple, std::unique_ptr<TargetHandlerBase>( + new X86TargetHandler(*this))) {} diff --git a/lib/ReaderWriter/ELF/X86/X86LinkingContext.h b/lib/ReaderWriter/ELF/X86/X86LinkingContext.h new file mode 100644 index 000000000000..ff424f411aae --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86LinkingContext.h @@ -0,0 +1,42 @@ +//===- lib/ReaderWriter/ELF/X86/X86LinkingContext.h -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_TARGETINFO_H +#define LLD_READER_WRITER_ELF_X86_TARGETINFO_H + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { +class X86LinkingContext final : public ELFLinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + X86LinkingContext(llvm::Triple); + + /// \brief X86 has only two relative relocation + /// a) for supporting IFUNC relocs - R_386_IRELATIVE + /// b) for supporting relative relocs - R_386_RELATIVE + bool isRelativeReloc(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::x86); + switch (r.kindValue()) { + case llvm::ELF::R_386_IRELATIVE: + case llvm::ELF::R_386_RELATIVE: + return true; + default: + return false; + } + } +}; +} // end namespace elf +} // end namespace lld +#endif diff --git a/lib/ReaderWriter/ELF/X86/X86RelocationHandler.cpp b/lib/ReaderWriter/ELF/X86/X86RelocationHandler.cpp new file mode 100644 index 000000000000..da5a24c6ec37 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86RelocationHandler.cpp @@ -0,0 +1,57 @@ +//===- lib/ReaderWriter/ELF/X86/X86RelocationHandler.cpp ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86LinkingContext.h" +#include "X86TargetHandler.h" +#include "llvm/Support/Endian.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::support::endian; + +namespace { +/// \brief R_386_32 - word32: S + A +static int reloc32(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + int32_t result = (uint32_t)(S + A); + write32le(location, result | read32le(location)); + return 0; +} + +/// \brief R_386_PC32 - word32: S + A - P +static int relocPC32(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + uint32_t result = (uint32_t)((S + A) - P); + write32le(location, result + read32le(location)); + return 0; +} +} + +std::error_code X86TargetRelocationHandler::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t targetVAddress = writer.addressOfAtom(ref.target()); + uint64_t relocVAddress = atom._virtualAddr + ref.offsetInAtom(); + + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::x86); + switch (ref.kindValue()) { + case R_386_32: + reloc32(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_386_PC32: + relocPC32(location, relocVAddress, targetVAddress, ref.addend()); + break; + default: + return make_unhandled_reloc_error(); + } + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/X86/X86RelocationHandler.h b/lib/ReaderWriter/ELF/X86/X86RelocationHandler.h new file mode 100644 index 000000000000..f161cdd55983 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86RelocationHandler.h @@ -0,0 +1,29 @@ +//===- lib/ReaderWriter/ELF/X86/X86RelocationHandler.h --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_X86_RELOCATION_HANDLER_H +#define X86_X86_RELOCATION_HANDLER_H + +#include "X86TargetHandler.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, false> X86ELFType; + +class X86TargetRelocationHandler final : public TargetRelocationHandler { +public: + std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const override; +}; + +} // end namespace elf +} // end namespace lld + +#endif // X86_X86_RELOCATION_HANDLER_H diff --git a/lib/ReaderWriter/ELF/X86/X86TargetHandler.cpp b/lib/ReaderWriter/ELF/X86/X86TargetHandler.cpp new file mode 100644 index 000000000000..22d918231424 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86TargetHandler.cpp @@ -0,0 +1,53 @@ +//===- lib/ReaderWriter/ELF/X86/X86TargetHandler.cpp ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86TargetHandler.h" +#include "X86DynamicLibraryWriter.h" +#include "X86ExecutableWriter.h" +#include "X86LinkingContext.h" +#include "X86RelocationHandler.h" + +using namespace lld; +using namespace elf; + +using namespace llvm::ELF; + +std::unique_ptr<Writer> X86TargetHandler::getWriter() { + switch (_x86LinkingContext.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>(new X86ExecutableWriter<X86ELFType>( + _x86LinkingContext, *_x86TargetLayout.get())); + case llvm::ELF::ET_DYN: + return std::unique_ptr<Writer>(new X86DynamicLibraryWriter<X86ELFType>( + _x86LinkingContext, *_x86TargetLayout.get())); + case llvm::ELF::ET_REL: + llvm_unreachable("TODO: support -r mode"); + default: + llvm_unreachable("unsupported output type"); + } +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings X86TargetHandler::kindStrings[] = { +#include "llvm/Support/ELFRelocs/i386.def" + LLD_KIND_STRING_END +}; + +#undef ELF_RELOC + +void X86TargetHandler::registerRelocationNames(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, Reference::KindArch::x86, + kindStrings); +} + +X86TargetHandler::X86TargetHandler(X86LinkingContext &context) + : _x86LinkingContext(context), + _x86TargetLayout(new X86TargetLayout<X86ELFType>(context)), + _x86RelocationHandler(new X86TargetRelocationHandler()) {} diff --git a/lib/ReaderWriter/ELF/X86/X86TargetHandler.h b/lib/ReaderWriter/ELF/X86/X86TargetHandler.h new file mode 100644 index 000000000000..6c4026735419 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86/X86TargetHandler.h @@ -0,0 +1,63 @@ +//===- lib/ReaderWriter/ELF/X86/X86TargetHandler.h ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_X86_TARGET_HANDLER_H + +#include "DefaultTargetHandler.h" +#include "TargetLayout.h" +#include "X86ELFFile.h" +#include "X86ELFReader.h" +#include "X86RelocationHandler.h" + +namespace lld { +namespace elf { + +class X86LinkingContext; + +template <class ELFT> class X86TargetLayout : public TargetLayout<ELFT> { +public: + X86TargetLayout(X86LinkingContext &context) : TargetLayout<ELFT>(context) {} +}; + +class X86TargetHandler final + : public DefaultTargetHandler<X86ELFType> { +public: + X86TargetHandler(X86LinkingContext &context); + + X86TargetLayout<X86ELFType> &getTargetLayout() override { + return *(_x86TargetLayout.get()); + } + + void registerRelocationNames(Registry ®istry) override; + + const X86TargetRelocationHandler &getRelocationHandler() const override { + return *(_x86RelocationHandler.get()); + } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>(new X86ELFObjectReader(_x86LinkingContext)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>(new X86ELFDSOReader(_x86LinkingContext)); + } + + std::unique_ptr<Writer> getWriter() override; + +protected: + static const Registry::KindStrings kindStrings[]; + X86LinkingContext &_x86LinkingContext; + std::unique_ptr<X86TargetLayout<X86ELFType>> _x86TargetLayout; + std::unique_ptr<X86TargetRelocationHandler> _x86RelocationHandler; +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/CMakeLists.txt b/lib/ReaderWriter/ELF/X86_64/CMakeLists.txt new file mode 100644 index 000000000000..a85d2b504630 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_library(lldX86_64ELFTarget + X86_64LinkingContext.cpp + X86_64TargetHandler.cpp + X86_64RelocationHandler.cpp + X86_64RelocationPass.cpp + LINK_LIBS + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) + +include_directories(.) + +add_subdirectory(ExampleSubTarget) diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/CMakeLists.txt b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/CMakeLists.txt new file mode 100644 index 000000000000..d13c98008e55 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/CMakeLists.txt @@ -0,0 +1,11 @@ +add_llvm_library(lldExampleSubTarget + ExampleLinkingContext.cpp + ExampleTargetHandler.cpp + LINK_LIBS + lldX86_64ELFTarget + lldELF + lldReaderWriter + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.cpp b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.cpp new file mode 100644 index 000000000000..dbbb3ad3bc90 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.cpp @@ -0,0 +1,35 @@ +//===- lib/ReaderWriter/ELF/X86_64/ExampleTarget/ExampleLinkingContext.cpp ----===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExampleLinkingContext.h" +#include "ExampleTargetHandler.h" + +using namespace lld; +using namespace elf; + +std::unique_ptr<ELFLinkingContext> +ExampleLinkingContext::create(llvm::Triple triple) { + if (triple.getVendorName() == "example") + return llvm::make_unique<ExampleLinkingContext>(triple); + return nullptr; +} + +ExampleLinkingContext::ExampleLinkingContext(llvm::Triple triple) + : X86_64LinkingContext(triple, std::unique_ptr<TargetHandlerBase>( + new ExampleTargetHandler(*this))) { + _outputELFType = llvm::ELF::ET_LOPROC; +} + +StringRef ExampleLinkingContext::entrySymbolName() const { + return "_start"; +} + +void ExampleLinkingContext::addPasses(PassManager &p) { + ELFLinkingContext::addPasses(p); +} diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.h b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.h new file mode 100644 index 000000000000..5bb11cd35b41 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleLinkingContext.h @@ -0,0 +1,31 @@ +//===- lib/ReaderWriter/ELF/X86_64/ExampleTarget/ExampleLinkingContext.h --===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_EXAMPLE_TARGET_EXAMPLE_LINKING_CONTEXT +#define LLD_READER_WRITER_ELF_X86_64_EXAMPLE_TARGET_EXAMPLE_LINKING_CONTEXT + +#include "X86_64LinkingContext.h" +#include "X86_64TargetHandler.h" + +namespace lld { +namespace elf { + +class ExampleLinkingContext final : public X86_64LinkingContext { +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + ExampleLinkingContext(llvm::Triple triple); + + StringRef entrySymbolName() const override; + void addPasses(PassManager &) override; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.cpp b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.cpp new file mode 100644 index 000000000000..b66b0d903f6a --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.cpp @@ -0,0 +1,23 @@ +//===- lib/ReaderWriter/ELF/X86_64/ExampleTarget/ExampleTargetHandler.cpp -===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExampleTargetHandler.h" +#include "X86_64ExecutableWriter.h" +#include "ExampleLinkingContext.h" + +using namespace lld; +using namespace elf; + +ExampleTargetHandler::ExampleTargetHandler(ExampleLinkingContext &c) + : X86_64TargetHandler(c), _exampleContext(c) {} + +std::unique_ptr<Writer> ExampleTargetHandler::getWriter() { + return std::unique_ptr<Writer>( + new X86_64ExecutableWriter(_exampleContext, *_x86_64TargetLayout)); +} diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.h b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.h new file mode 100644 index 000000000000..19a642113359 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/ExampleTargetHandler.h @@ -0,0 +1,31 @@ +//===- lib/ReaderWriter/ELF/X86_64/ExampleTarget/ExampleTargetHandler.h ---===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_EXAMPLE_TARGET_EXAMPLE_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_X86_64_EXAMPLE_TARGET_EXAMPLE_TARGET_HANDLER_H + +#include "X86_64TargetHandler.h" + +namespace lld { +namespace elf { +class ExampleLinkingContext; + +class ExampleTargetHandler final : public X86_64TargetHandler { +public: + ExampleTargetHandler(ExampleLinkingContext &c); + + std::unique_ptr<Writer> getWriter() override; + +private: + ExampleLinkingContext &_exampleContext; +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/Makefile b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/Makefile new file mode 100644 index 000000000000..8f0b0fead1f6 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/ExampleSubTarget/Makefile @@ -0,0 +1,15 @@ +##===- lld/lib/ReaderWriter/ELF/X86_64/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../../.. +LIBRARYNAME := lldExampleSubTarget +USEDLIBS = lldX86_64ELFTarget.a +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/X86_64/Makefile b/lib/ReaderWriter/ELF/X86_64/Makefile new file mode 100644 index 000000000000..dbeb4d227050 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/Makefile @@ -0,0 +1,19 @@ +##===- lld/lib/ReaderWriter/ELF/X86_64/Makefile ----------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../../.. +LIBRARYNAME := lldX86_64ELFTarget +USEDLIBS = lldCore.a + +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF +CPP.Flags += -I$(PROJ_SRC_DIR)/$(LLD_LEVEL)/lib/ReaderWriter/ELF/X86_64/ + +PARALLEL_DIRS := ExampleSubTarget + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/ELF/X86_64/TODO.rst b/lib/ReaderWriter/ELF/X86_64/TODO.rst new file mode 100644 index 000000000000..a2411a00d1ea --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/TODO.rst @@ -0,0 +1,46 @@ +ELF x86-64 +~~~~~~~~~~ + +Unimplemented Features +###################### + +* Code models other than the small code model +* TLS strength reduction + +Unimplemented Relocations +######################### + +All of these relocations are defined in: +http://www.x86-64.org/documentation/abi.pdf + +Trivial Relocs +<<<<<<<<<<<<<< + +These are very simple relocation calculations to implement. +See lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.cpp + +* R_X86_64_8 +* R_X86_64_PC8 +* R_X86_64_SIZE32 +* R_X86_64_SIZE64 +* R_X86_64_GOTPC32 (this relocation requires there to be a __GLOBAL_OFFSET_TABLE__) + +Global Offset Table Relocs +<<<<<<<<<<<<<<<<<<<<<<<<<< + +* R_X86_64_GOTOFF32 +* R_X86_64_GOTOFF64 + +Global Dynamic Thread Local Storage Relocs +<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +These relocations take more effort to implement, but some of them are done. +Their implementation lives in lib/ReaderWriter/ELF/X86_64/{X86_64RelocationPass.cpp,X86_64RelocationHandler.cpp}. + +Documentation on these relocations can be found in: +http://www.akkadia.org/drepper/tls.pdf +http://www.fsfla.org/~lxoliva/writeups/TLS/RFC-TLSDESC-x86.txt + +* R_X86_64_GOTPC32_TLSDESC +* R_X86_64_TLSDESC_CALL +* R_X86_64_TLSDESC diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64DynamicLibraryWriter.h b/lib/ReaderWriter/ELF/X86_64/X86_64DynamicLibraryWriter.h new file mode 100644 index 000000000000..b996186115b6 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64DynamicLibraryWriter.h @@ -0,0 +1,63 @@ +//===- lib/ReaderWriter/ELF/X86/X86_64DynamicLibraryWriter.h ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef X86_64_DYNAMIC_LIBRARY_WRITER_H +#define X86_64_DYNAMIC_LIBRARY_WRITER_H + +#include "DynamicLibraryWriter.h" +#include "X86_64ElfType.h" +#include "X86_64LinkingContext.h" +#include "X86_64TargetHandler.h" + +namespace lld { +namespace elf { + +class X86_64DynamicLibraryWriter : public DynamicLibraryWriter<X86_64ELFType> { +public: + X86_64DynamicLibraryWriter(X86_64LinkingContext &context, + X86_64TargetLayout &layout); + +protected: + // Add any runtime files and their atoms to the output + virtual bool createImplicitFiles(std::vector<std::unique_ptr<File>> &); + + virtual void finalizeDefaultAtomValues() { + return DynamicLibraryWriter::finalizeDefaultAtomValues(); + } + + virtual void addDefaultAtoms() { + return DynamicLibraryWriter::addDefaultAtoms(); + } + +private: + class GOTFile : public SimpleFile { + public: + GOTFile(const ELFLinkingContext &eti) : SimpleFile("GOTFile") {} + llvm::BumpPtrAllocator _alloc; + }; + + std::unique_ptr<GOTFile> _gotFile; +}; + +X86_64DynamicLibraryWriter::X86_64DynamicLibraryWriter( + X86_64LinkingContext &context, X86_64TargetLayout &layout) + : DynamicLibraryWriter(context, layout), _gotFile(new GOTFile(context)) {} + +bool X86_64DynamicLibraryWriter::createImplicitFiles( + std::vector<std::unique_ptr<File>> &result) { + DynamicLibraryWriter::createImplicitFiles(result); + _gotFile->addAtom(*new (_gotFile->_alloc) GLOBAL_OFFSET_TABLEAtom(*_gotFile)); + _gotFile->addAtom(*new (_gotFile->_alloc) DYNAMICAtom(*_gotFile)); + result.push_back(std::move(_gotFile)); + return true; +} + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64ELFFile.h b/lib/ReaderWriter/ELF/X86_64/X86_64ELFFile.h new file mode 100644 index 000000000000..d43840a63e7e --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64ELFFile.h @@ -0,0 +1,41 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64ELFFile.h ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_ELF_FILE_H +#define LLD_READER_WRITER_ELF_X86_64_ELF_FILE_H + +#include "ELFReader.h" + +namespace lld { +namespace elf { + +class X86_64LinkingContext; + +template <class ELFT> class X86_64ELFFile : public ELFFile<ELFT> { +public: + X86_64ELFFile(std::unique_ptr<MemoryBuffer> mb, X86_64LinkingContext &ctx) + : ELFFile<ELFT>(std::move(mb), ctx) {} + + static ErrorOr<std::unique_ptr<X86_64ELFFile>> + create(std::unique_ptr<MemoryBuffer> mb, X86_64LinkingContext &ctx) { + return std::unique_ptr<X86_64ELFFile<ELFT>>( + new X86_64ELFFile<ELFT>(std::move(mb), ctx)); + } +}; + +template <class ELFT> class X86_64DynamicFile : public DynamicFile<ELFT> { +public: + X86_64DynamicFile(const X86_64LinkingContext &context, StringRef name) + : DynamicFile<ELFT>(context, name) {} +}; + +} // elf +} // lld + +#endif // LLD_READER_WRITER_ELF_X86_64_ELF_FILE_H diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64ELFReader.h b/lib/ReaderWriter/ELF/X86_64/X86_64ELFReader.h new file mode 100644 index 000000000000..9b1284c6dfa8 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64ELFReader.h @@ -0,0 +1,62 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64ELFReader.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_X86_64_X86_64_ELF_READER_H +#define LLD_READER_WRITER_X86_64_X86_64_ELF_READER_H + +#include "ELFReader.h" +#include "X86_64ELFFile.h" + +namespace lld { +namespace elf { + +typedef llvm::object::ELFType<llvm::support::little, 2, true> X86_64ELFType; + +struct X86_64DynamicFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::SharedLibraryFile>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + X86_64LinkingContext &ctx) { + return lld::elf::X86_64DynamicFile<ELFT>::create(std::move(mb), ctx); + } +}; + +struct X86_64ELFFileCreateELFTraits { + typedef llvm::ErrorOr<std::unique_ptr<lld::File>> result_type; + + template <class ELFT> + static result_type create(std::unique_ptr<llvm::MemoryBuffer> mb, + X86_64LinkingContext &ctx) { + return lld::elf::X86_64ELFFile<ELFT>::create(std::move(mb), ctx); + } +}; + +class X86_64ELFObjectReader + : public ELFObjectReader<X86_64ELFType, X86_64ELFFileCreateELFTraits, + X86_64LinkingContext> { +public: + X86_64ELFObjectReader(X86_64LinkingContext &ctx) + : ELFObjectReader<X86_64ELFType, X86_64ELFFileCreateELFTraits, + X86_64LinkingContext>(ctx, llvm::ELF::EM_X86_64) {} +}; + +class X86_64ELFDSOReader + : public ELFDSOReader<X86_64ELFType, X86_64DynamicFileCreateELFTraits, + X86_64LinkingContext> { +public: + X86_64ELFDSOReader(X86_64LinkingContext &ctx) + : ELFDSOReader<X86_64ELFType, X86_64DynamicFileCreateELFTraits, + X86_64LinkingContext>(ctx, llvm::ELF::EM_X86_64) {} +}; + +} // namespace elf +} // namespace lld + +#endif // LLD_READER_WRITER_ELF_X86_64_X86_64_READER_H diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64ElfType.h b/lib/ReaderWriter/ELF/X86_64/X86_64ElfType.h new file mode 100644 index 000000000000..0b982e7754e2 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64ElfType.h @@ -0,0 +1,21 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64ElfType.h ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_X86_64_ELF_TYPE_H +#define LLD_READER_WRITER_ELF_X86_64_X86_64_ELF_TYPE_H + +#include "llvm/Object/ELF.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, true> X86_64ELFType; +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64ExecutableWriter.h b/lib/ReaderWriter/ELF/X86_64/X86_64ExecutableWriter.h new file mode 100644 index 000000000000..f549ed6dcfcb --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64ExecutableWriter.h @@ -0,0 +1,61 @@ +//===- lib/ReaderWriter/ELF/X86/X86_64ExecutableWriter.h ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#ifndef X86_64_EXECUTABLE_WRITER_H +#define X86_64_EXECUTABLE_WRITER_H + +#include "ExecutableWriter.h" +#include "X86_64ElfType.h" +#include "X86_64LinkingContext.h" + +namespace lld { +namespace elf { + +class X86_64ExecutableWriter : public ExecutableWriter<X86_64ELFType> { +public: + X86_64ExecutableWriter(X86_64LinkingContext &context, + X86_64TargetLayout &layout) + : ExecutableWriter(context, layout), _gotFile(new GOTFile(context)), + _context(context) {} + +protected: + // Add any runtime files and their atoms to the output + virtual bool + createImplicitFiles(std::vector<std::unique_ptr<File>> &result) { + ExecutableWriter::createImplicitFiles(result); + _gotFile->addAtom(*new (_gotFile->_alloc) + GLOBAL_OFFSET_TABLEAtom(*_gotFile)); + if (_context.isDynamic()) + _gotFile->addAtom(*new (_gotFile->_alloc) DYNAMICAtom(*_gotFile)); + result.push_back(std::move(_gotFile)); + return true; + } + + virtual void finalizeDefaultAtomValues() { + return ExecutableWriter::finalizeDefaultAtomValues(); + } + + virtual void addDefaultAtoms() { + return ExecutableWriter::addDefaultAtoms(); + } + +private: + class GOTFile : public SimpleFile { + public: + GOTFile(const ELFLinkingContext &eti) : SimpleFile("GOTFile") {} + llvm::BumpPtrAllocator _alloc; + }; + + std::unique_ptr<GOTFile> _gotFile; + X86_64LinkingContext &_context; +}; + +} // namespace elf +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.cpp b/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.cpp new file mode 100644 index 000000000000..6a8ce8bd6496 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.cpp @@ -0,0 +1,38 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.cpp ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86_64LinkingContext.h" +#include "X86_64RelocationPass.h" +#include "X86_64TargetHandler.h" + +using namespace lld; +using namespace elf; + +X86_64LinkingContext::X86_64LinkingContext( + llvm::Triple triple, std::unique_ptr<TargetHandlerBase> handler) + : ELFLinkingContext(triple, std::move(handler)) {} + +X86_64LinkingContext::X86_64LinkingContext(llvm::Triple triple) + : X86_64LinkingContext(triple, + llvm::make_unique<X86_64TargetHandler>(*this)) {} + +void X86_64LinkingContext::addPasses(PassManager &pm) { + auto pass = createX86_64RelocationPass(*this); + if (pass) + pm.add(std::move(pass)); + ELFLinkingContext::addPasses(pm); +} + +std::unique_ptr<ELFLinkingContext> +X86_64LinkingContext::create(llvm::Triple triple) { + if (triple.getArch() == llvm::Triple::x86_64) + return std::unique_ptr<ELFLinkingContext>( + new elf::X86_64LinkingContext(triple)); + return nullptr; +} diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.h b/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.h new file mode 100644 index 000000000000..2cc799a9c810 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.h @@ -0,0 +1,100 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64LinkingContext.h -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_X86_64_LINKING_CONTEXT_H +#define LLD_READER_WRITER_ELF_X86_64_X86_64_LINKING_CONTEXT_H + +#include "lld/ReaderWriter/ELFLinkingContext.h" +#include "llvm/Object/ELF.h" +#include "llvm/Support/ELF.h" + +namespace lld { +namespace elf { + +/// \brief x86-64 internal references. +enum { + /// \brief The 32 bit index of the relocation in the got this reference refers + /// to. + LLD_R_X86_64_GOTRELINDEX = 1024, +}; + +class X86_64LinkingContext : public ELFLinkingContext { +protected: + X86_64LinkingContext(llvm::Triple, std::unique_ptr<TargetHandlerBase>); +public: + static std::unique_ptr<ELFLinkingContext> create(llvm::Triple); + X86_64LinkingContext(llvm::Triple); + + void addPasses(PassManager &) override; + + uint64_t getBaseAddress() const override { + if (_baseAddress == 0) + return 0x400000; + return _baseAddress; + } + + bool isDynamicRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::x86_64); + switch (r.kindValue()) { + case llvm::ELF::R_X86_64_RELATIVE: + case llvm::ELF::R_X86_64_GLOB_DAT: + case llvm::ELF::R_X86_64_COPY: + case llvm::ELF::R_X86_64_DTPMOD64: + case llvm::ELF::R_X86_64_DTPOFF64: + case llvm::ELF::R_X86_64_TPOFF64: + return true; + default: + return false; + } + } + + bool isCopyRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::x86_64); + if (r.kindValue() == llvm::ELF::R_X86_64_COPY) + return true; + return false; + } + + virtual bool isPLTRelocation(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::x86_64); + switch (r.kindValue()) { + case llvm::ELF::R_X86_64_JUMP_SLOT: + case llvm::ELF::R_X86_64_IRELATIVE: + return true; + default: + return false; + } + } + + /// \brief X86_64 has two relative relocations + /// a) for supporting IFUNC - R_X86_64_IRELATIVE + /// b) for supporting relative relocs - R_X86_64_RELATIVE + bool isRelativeReloc(const Reference &r) const override { + if (r.kindNamespace() != Reference::KindNamespace::ELF) + return false; + assert(r.kindArch() == Reference::KindArch::x86_64); + switch (r.kindValue()) { + case llvm::ELF::R_X86_64_IRELATIVE: + case llvm::ELF::R_X86_64_RELATIVE: + return true; + default: + return false; + } + } +}; +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.cpp b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.cpp new file mode 100644 index 000000000000..8fd74f43bbd2 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.cpp @@ -0,0 +1,151 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.cpp ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "X86_64LinkingContext.h" +#include "X86_64TargetHandler.h" +#include "llvm/Support/Endian.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::support::endian; + +/// \brief R_X86_64_64 - word64: S + A +static void reloc64(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + uint64_t result = S + A; + write64le(location, result | read64le(location)); +} + +/// \brief R_X86_64_PC32 - word32: S + A - P +static void relocPC32(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + uint32_t result = (uint32_t)((S + A) - P); + write32le(location, result + read32le(location)); +} + +/// \brief R_X86_64_32 - word32: S + A +static void reloc32(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + int32_t result = (uint32_t)(S + A); + write32le(location, result | read32le(location)); + // TODO: Make sure that the result zero extends to the 64bit value. +} + +/// \brief R_X86_64_32S - word32: S + A +static void reloc32S(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + int32_t result = (int32_t)(S + A); + write32le(location, result | read32le(location)); + // TODO: Make sure that the result sign extends to the 64bit value. +} + +/// \brief R_X86_64_16 - word16: S + A +static void reloc16(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + uint16_t result = (uint16_t)(S + A); + write16le(location, result | read16le(location)); + // TODO: Check for overflow. +} + +/// \brief R_X86_64_PC16 - word16: S + A - P +static void relocPC16(uint8_t *location, uint64_t P, uint64_t S, int64_t A) { + uint16_t result = (uint16_t)((S + A) - P); + write16le(location, result | read16le(location)); + // TODO: Check for overflow. +} + +/// \brief R_X86_64_PC64 - word64: S + A - P +static void relocPC64(uint8_t *location, uint64_t P, uint64_t S, uint64_t A) { + int64_t result = (uint64_t)((S + A) - P); + write64le(location, result | read64le(location)); +} + +std::error_code X86_64TargetRelocationHandler::applyRelocation( + ELFWriter &writer, llvm::FileOutputBuffer &buf, const lld::AtomLayout &atom, + const Reference &ref) const { + uint8_t *atomContent = buf.getBufferStart() + atom._fileOffset; + uint8_t *location = atomContent + ref.offsetInAtom(); + uint64_t targetVAddress = writer.addressOfAtom(ref.target()); + uint64_t relocVAddress = atom._virtualAddr + ref.offsetInAtom(); + + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return std::error_code(); + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case R_X86_64_NONE: + break; + case R_X86_64_64: + reloc64(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_PC32: + case R_X86_64_GOTPCREL: + relocPC32(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_32: + reloc32(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_32S: + reloc32S(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_16: + reloc16(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_PC16: + relocPC16(location, relocVAddress, targetVAddress, ref.addend()); + break; + case R_X86_64_TPOFF64: + case R_X86_64_DTPOFF32: + case R_X86_64_TPOFF32: { + _tlsSize = _x86_64Layout.getTLSSize(); + if (ref.kindValue() == R_X86_64_TPOFF32 || + ref.kindValue() == R_X86_64_DTPOFF32) { + write32le(location, targetVAddress - _tlsSize); + } else { + write64le(location, targetVAddress - _tlsSize); + } + break; + } + case R_X86_64_TLSGD: { + relocPC32(location, relocVAddress, targetVAddress, ref.addend()); + break; + } + case R_X86_64_TLSLD: { + // Rewrite to move %fs:0 into %rax. Technically we should verify that the + // next relocation is a PC32 to __tls_get_addr... + static uint8_t instr[] = { 0x66, 0x66, 0x66, 0x64, 0x48, 0x8b, 0x04, 0x25, + 0x00, 0x00, 0x00, 0x00 }; + std::memcpy(location - 3, instr, sizeof(instr)); + break; + } + case R_X86_64_PC64: + relocPC64(location, relocVAddress, targetVAddress, ref.addend()); + break; + case LLD_R_X86_64_GOTRELINDEX: { + const DefinedAtom *target = cast<const DefinedAtom>(ref.target()); + for (const Reference *r : *target) { + if (r->kindValue() == R_X86_64_JUMP_SLOT) { + uint32_t index; + if (!_x86_64Layout.getPLTRelocationTable()->getRelocationIndex(*r, + index)) + llvm_unreachable("Relocation doesn't exist"); + reloc32(location, 0, index, 0); + break; + } + } + break; + } + // Runtime only relocations. Ignore here. + case R_X86_64_RELATIVE: + case R_X86_64_IRELATIVE: + case R_X86_64_JUMP_SLOT: + case R_X86_64_GLOB_DAT: + case R_X86_64_DTPMOD64: + case R_X86_64_DTPOFF64: + break; + default: + return make_unhandled_reloc_error(); + } + + return std::error_code(); +} diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.h b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.h new file mode 100644 index 000000000000..9e2c2171015d --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.h @@ -0,0 +1,39 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64RelocationHandler.h --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86_64_RELOCATION_HANDLER_H +#define X86_64_RELOCATION_HANDLER_H + +#include "X86_64TargetHandler.h" + +namespace lld { +namespace elf { +typedef llvm::object::ELFType<llvm::support::little, 2, true> X86_64ELFType; + +class X86_64TargetLayout; + +class X86_64TargetRelocationHandler final : public TargetRelocationHandler { +public: + X86_64TargetRelocationHandler(X86_64TargetLayout &layout) + : _tlsSize(0), _x86_64Layout(layout) {} + + std::error_code applyRelocation(ELFWriter &, llvm::FileOutputBuffer &, + const lld::AtomLayout &, + const Reference &) const override; + +private: + // Cached size of the TLS segment. + mutable uint64_t _tlsSize; + X86_64TargetLayout &_x86_64Layout; +}; + +} // end namespace elf +} // end namespace lld + +#endif // X86_64_RELOCATION_HANDLER_H diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.cpp b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.cpp new file mode 100644 index 000000000000..0703927fd56c --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.cpp @@ -0,0 +1,513 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.cpp ---------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Defines the relocation processing pass for x86-64. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +/// This is based on section 4.4.1 of the AMD64 ABI (no stable URL as of Oct, +/// 2013). +/// +/// This also includes aditional behaivor that gnu-ld and gold implement but +/// which is not specified anywhere. +/// +//===----------------------------------------------------------------------===// + +#include "X86_64RelocationPass.h" +#include "Atoms.h" +#include "X86_64LinkingContext.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +using namespace lld; +using namespace lld::elf; +using namespace llvm::ELF; + +// .got values +static const uint8_t x86_64GotAtomContent[8] = {0}; + +// .plt value (entry 0) +static const uint8_t x86_64Plt0AtomContent[16] = { + 0xff, 0x35, 0x00, 0x00, 0x00, 0x00, // pushq GOT+8(%rip) + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *GOT+16(%rip) + 0x90, 0x90, 0x90, 0x90 // nopnopnop +}; + +// .plt values (other entries) +static const uint8_t x86_64PltAtomContent[16] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // jmpq *gotatom(%rip) + 0x68, 0x00, 0x00, 0x00, 0x00, // pushq reloc-index + 0xe9, 0x00, 0x00, 0x00, 0x00 // jmpq plt[-1] +}; + +// TLS GD Entry +static const uint8_t x86_64GotTlsGdAtomContent[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +namespace { +/// \brief Atoms that are used by X86_64 dynamic linking +class X86_64GOTAtom : public GOTAtom { +public: + X86_64GOTAtom(const File &f, StringRef secName) : GOTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(x86_64GotAtomContent, 8); + } +}; + +/// \brief X86_64 GOT TLS GD entry. +class GOTTLSGdAtom : public X86_64GOTAtom { +public: + GOTTLSGdAtom(const File &f, StringRef secName) : X86_64GOTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(x86_64GotTlsGdAtomContent); + } +}; + +class X86_64PLT0Atom : public PLT0Atom { +public: + X86_64PLT0Atom(const File &f) : PLT0Atom(f) {} + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(x86_64Plt0AtomContent, 16); + } +}; + +class X86_64PLTAtom : public PLTAtom { +public: + X86_64PLTAtom(const File &f, StringRef secName) : PLTAtom(f, secName) {} + + ArrayRef<uint8_t> rawContent() const override { + return ArrayRef<uint8_t>(x86_64PltAtomContent, 16); + } +}; + +class ELFPassFile : public SimpleFile { +public: + ELFPassFile(const ELFLinkingContext &eti) : SimpleFile("ELFPassFile") { + setOrdinal(eti.getNextOrdinalAndIncrement()); + } + + llvm::BumpPtrAllocator _alloc; +}; + +/// \brief CRTP base for handling relocations. +template <class Derived> class RelocationPass : public Pass { + /// \brief Handle a specific reference. + void handleReference(const DefinedAtom &atom, const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::ELF) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case R_X86_64_16: + case R_X86_64_32: + case R_X86_64_32S: + case R_X86_64_64: + case R_X86_64_PC16: + case R_X86_64_PC32: + case R_X86_64_PC64: + static_cast<Derived *>(this)->handlePlain(ref); + break; + case R_X86_64_PLT32: + static_cast<Derived *>(this)->handlePLT32(ref); + break; + case R_X86_64_GOT32: + case R_X86_64_GOTPC32: + case R_X86_64_GOTPCREL: + case R_X86_64_GOTOFF64: + static_cast<Derived *>(this)->handleGOT(ref); + break; + case R_X86_64_GOTTPOFF: // GOT Thread Pointer Offset + static_cast<Derived *>(this)->handleGOTTPOFF(ref); + break; + case R_X86_64_TLSGD: + static_cast<Derived *>(this)->handleTLSGd(ref); + break; + } + } + +protected: + /// \brief get the PLT entry for a given IFUNC Atom. + /// + /// If the entry does not exist. Both the GOT and PLT entry is created. + const PLTAtom *getIFUNCPLTEntry(const DefinedAtom *da) { + auto plt = _pltMap.find(da); + if (plt != _pltMap.end()) + return plt->second; + auto ga = new (_file._alloc) X86_64GOTAtom(_file, ".got.plt"); + ga->addReferenceELF_x86_64(R_X86_64_IRELATIVE, 0, da, 0); + auto pa = new (_file._alloc) X86_64PLTAtom(_file, ".plt"); + pa->addReferenceELF_x86_64(R_X86_64_PC32, 2, ga, -4); +#ifndef NDEBUG + ga->_name = "__got_ifunc_"; + ga->_name += da->name(); + pa->_name = "__plt_ifunc_"; + pa->_name += da->name(); +#endif + _gotMap[da] = ga; + _pltMap[da] = pa; + _gotVector.push_back(ga); + _pltVector.push_back(pa); + return pa; + } + + /// \brief Redirect the call to the PLT stub for the target IFUNC. + /// + /// This create a PLT and GOT entry for the IFUNC if one does not exist. The + /// GOT entry and a IRELATIVE relocation to the original target resolver. + std::error_code handleIFUNC(const Reference &ref) { + auto target = dyn_cast_or_null<const DefinedAtom>(ref.target()); + if (target && target->contentType() == DefinedAtom::typeResolver) + const_cast<Reference &>(ref).setTarget(getIFUNCPLTEntry(target)); + return std::error_code(); + } + + /// \brief Create a GOT entry for the TP offset of a TLS atom. + const GOTAtom *getGOTTPOFF(const Atom *atom) { + auto got = _gotMap.find(atom); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) X86_64GOTAtom(_file, ".got"); + g->addReferenceELF_x86_64(R_X86_64_TPOFF64, 0, atom, 0); +#ifndef NDEBUG + g->_name = "__got_tls_"; + g->_name += atom->name(); +#endif + _gotMap[atom] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + + /// \brief Create a TPOFF64 GOT entry and change the relocation to a PC32 to + /// the GOT. + void handleGOTTPOFF(const Reference &ref) { + const_cast<Reference &>(ref).setTarget(getGOTTPOFF(ref.target())); + const_cast<Reference &>(ref).setKindValue(R_X86_64_PC32); + } + + /// \brief Create a TLS GOT entry with DTPMOD64/DTPOFF64 dynamic relocations. + void handleTLSGd(const Reference &ref) { + const_cast<Reference &>(ref).setTarget(getTLSGdGOTEntry(ref.target())); + } + + /// \brief Create a GOT entry containing 0. + const GOTAtom *getNullGOT() { + if (!_null) { + _null = new (_file._alloc) X86_64GOTAtom(_file, ".got.plt"); +#ifndef NDEBUG + _null->_name = "__got_null"; +#endif + } + return _null; + } + + const GOTAtom *getGOT(const DefinedAtom *da) { + auto got = _gotMap.find(da); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) X86_64GOTAtom(_file, ".got"); + g->addReferenceELF_x86_64(R_X86_64_64, 0, da, 0); +#ifndef NDEBUG + g->_name = "__got_"; + g->_name += da->name(); +#endif + _gotMap[da] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + + const GOTAtom *getTLSGdGOTEntry(const Atom *a) { + auto got = _gotTLSGdMap.find(a); + if (got != _gotTLSGdMap.end()) + return got->second; + + auto ga = new (_file._alloc) GOTTLSGdAtom(_file, ".got"); + _gotTLSGdMap[a] = ga; + + _tlsGotVector.push_back(ga); + ga->addReferenceELF_x86_64(R_X86_64_DTPMOD64, 0, a, 0); + ga->addReferenceELF_x86_64(R_X86_64_DTPOFF64, 8, a, 0); + + return ga; + } + +public: + RelocationPass(const ELFLinkingContext &ctx) + : _file(ctx), _ctx(ctx), _null(nullptr), _PLT0(nullptr), _got0(nullptr), + _got1(nullptr) {} + + /// \brief Do the pass. + /// + /// The goal here is to first process each reference individually. Each call + /// to handleReference may modify the reference itself and/or create new + /// atoms which must be stored in one of the maps below. + /// + /// After all references are handled, the atoms created during that are all + /// added to mf. + void perform(std::unique_ptr<MutableFile> &mf) override { + ScopedTask task(getDefaultDomain(), "X86-64 GOT/PLT Pass"); + // Process all references. + for (const auto &atom : mf->defined()) + for (const auto &ref : *atom) + handleReference(*atom, *ref); + + // Add all created atoms to the link. + uint64_t ordinal = 0; + if (_PLT0) { + _PLT0->setOrdinal(ordinal++); + mf->addAtom(*_PLT0); + } + for (auto &plt : _pltVector) { + plt->setOrdinal(ordinal++); + mf->addAtom(*plt); + } + if (_null) { + _null->setOrdinal(ordinal++); + mf->addAtom(*_null); + } + if (_PLT0) { + _got0->setOrdinal(ordinal++); + _got1->setOrdinal(ordinal++); + mf->addAtom(*_got0); + mf->addAtom(*_got1); + } + for (auto &got : _gotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + for (auto &got : _tlsGotVector) { + got->setOrdinal(ordinal++); + mf->addAtom(*got); + } + for (auto obj : _objectVector) { + obj->setOrdinal(ordinal++); + mf->addAtom(*obj); + } + } + +protected: + /// \brief Owner of all the Atoms created by this pass. + ELFPassFile _file; + const ELFLinkingContext &_ctx; + + /// \brief Map Atoms to their GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotMap; + + /// \brief Map Atoms to their PLT entries. + llvm::DenseMap<const Atom *, PLTAtom *> _pltMap; + + /// \brief Map Atoms to TLS GD GOT entries. + llvm::DenseMap<const Atom *, GOTAtom *> _gotTLSGdMap; + + /// \brief Map Atoms to their Object entries. + llvm::DenseMap<const Atom *, ObjectAtom *> _objectMap; + + /// \brief the list of GOT/PLT atoms + std::vector<GOTAtom *> _gotVector; + std::vector<PLTAtom *> _pltVector; + std::vector<ObjectAtom *> _objectVector; + + /// \brief the list of TLS GOT atoms. + std::vector<GOTAtom *> _tlsGotVector; + + /// \brief GOT entry that is always 0. Used for undefined weaks. + GOTAtom *_null; + + /// \brief The got and plt entries for .PLT0. This is used to call into the + /// dynamic linker for symbol resolution. + /// @{ + PLT0Atom *_PLT0; + GOTAtom *_got0; + GOTAtom *_got1; + /// @} +}; + +/// This implements the static relocation model. Meaning GOT and PLT entries are +/// not created for references that can be directly resolved. These are +/// converted to a direct relocation. For entries that do require a GOT or PLT +/// entry, that entry is statically bound. +/// +/// TLS always assumes module 1 and attempts to remove indirection. +class StaticRelocationPass final + : public RelocationPass<StaticRelocationPass> { +public: + StaticRelocationPass(const elf::X86_64LinkingContext &ctx) + : RelocationPass(ctx) {} + + std::error_code handlePlain(const Reference &ref) { return handleIFUNC(ref); } + + std::error_code handlePLT32(const Reference &ref) { + // __tls_get_addr is handled elsewhere. + if (ref.target() && ref.target()->name() == "__tls_get_addr") { + const_cast<Reference &>(ref).setKindValue(R_X86_64_NONE); + return std::error_code(); + } + // Static code doesn't need PLTs. + const_cast<Reference &>(ref).setKindValue(R_X86_64_PC32); + // Handle IFUNC. + if (const DefinedAtom *da = + dyn_cast_or_null<const DefinedAtom>(ref.target())) + if (da->contentType() == DefinedAtom::typeResolver) + return handleIFUNC(ref); + return std::error_code(); + } + + std::error_code handleGOT(const Reference &ref) { + if (isa<UndefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getNullGOT()); + else if (const DefinedAtom *da = dyn_cast<const DefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getGOT(da)); + return std::error_code(); + } +}; + +class DynamicRelocationPass final + : public RelocationPass<DynamicRelocationPass> { +public: + DynamicRelocationPass(const elf::X86_64LinkingContext &ctx) + : RelocationPass(ctx) {} + + const PLT0Atom *getPLT0() { + if (_PLT0) + return _PLT0; + // Fill in the null entry. + getNullGOT(); + _PLT0 = new (_file._alloc) X86_64PLT0Atom(_file); + _got0 = new (_file._alloc) X86_64GOTAtom(_file, ".got.plt"); + _got1 = new (_file._alloc) X86_64GOTAtom(_file, ".got.plt"); + _PLT0->addReferenceELF_x86_64(R_X86_64_PC32, 2, _got0, -4); + _PLT0->addReferenceELF_x86_64(R_X86_64_PC32, 8, _got1, -4); +#ifndef NDEBUG + _got0->_name = "__got0"; + _got1->_name = "__got1"; +#endif + return _PLT0; + } + + const PLTAtom *getPLTEntry(const Atom *a) { + auto plt = _pltMap.find(a); + if (plt != _pltMap.end()) + return plt->second; + auto ga = new (_file._alloc) X86_64GOTAtom(_file, ".got.plt"); + ga->addReferenceELF_x86_64(R_X86_64_JUMP_SLOT, 0, a, 0); + auto pa = new (_file._alloc) X86_64PLTAtom(_file, ".plt"); + pa->addReferenceELF_x86_64(R_X86_64_PC32, 2, ga, -4); + pa->addReferenceELF_x86_64(LLD_R_X86_64_GOTRELINDEX, 7, ga, 0); + pa->addReferenceELF_x86_64(R_X86_64_PC32, 12, getPLT0(), -4); + // Set the starting address of the got entry to the second instruction in + // the plt entry. + ga->addReferenceELF_x86_64(R_X86_64_64, 0, pa, 6); +#ifndef NDEBUG + ga->_name = "__got_"; + ga->_name += a->name(); + pa->_name = "__plt_"; + pa->_name += a->name(); +#endif + _gotMap[a] = ga; + _pltMap[a] = pa; + _gotVector.push_back(ga); + _pltVector.push_back(pa); + return pa; + } + + const ObjectAtom *getObjectEntry(const SharedLibraryAtom *a) { + auto obj = _objectMap.find(a); + if (obj != _objectMap.end()) + return obj->second; + + auto oa = new (_file._alloc) ObjectAtom(_file); + // This needs to point to the atom that we just created. + oa->addReferenceELF_x86_64(R_X86_64_COPY, 0, oa, 0); + + oa->_name = a->name(); + oa->_size = a->size(); + + _objectMap[a] = oa; + _objectVector.push_back(oa); + return oa; + } + + std::error_code handlePlain(const Reference &ref) { + if (!ref.target()) + return std::error_code(); + if (auto sla = dyn_cast<SharedLibraryAtom>(ref.target())) { + if (sla->type() == SharedLibraryAtom::Type::Data) + const_cast<Reference &>(ref).setTarget(getObjectEntry(sla)); + else if (sla->type() == SharedLibraryAtom::Type::Code) + const_cast<Reference &>(ref).setTarget(getPLTEntry(sla)); + } else + return handleIFUNC(ref); + return std::error_code(); + } + + std::error_code handlePLT32(const Reference &ref) { + // Turn this into a PC32 to the PLT entry. + const_cast<Reference &>(ref).setKindValue(R_X86_64_PC32); + // Handle IFUNC. + if (const DefinedAtom *da = + dyn_cast_or_null<const DefinedAtom>(ref.target())) + if (da->contentType() == DefinedAtom::typeResolver) + return handleIFUNC(ref); + // If it is undefined at link time, push the work to the dynamic linker by + // creating a PLT entry + if (isa<SharedLibraryAtom>(ref.target()) || + isa<UndefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getPLTEntry(ref.target())); + return std::error_code(); + } + + const GOTAtom *getSharedGOT(const Atom *a) { + auto got = _gotMap.find(a); + if (got == _gotMap.end()) { + auto g = new (_file._alloc) X86_64GOTAtom(_file, ".got"); + g->addReferenceELF_x86_64(R_X86_64_GLOB_DAT, 0, a, 0); +#ifndef NDEBUG + g->_name = "__got_"; + g->_name += a->name(); +#endif + _gotMap[a] = g; + _gotVector.push_back(g); + return g; + } + return got->second; + } + + std::error_code handleGOT(const Reference &ref) { + if (const DefinedAtom *da = dyn_cast<const DefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getGOT(da)); + // Handle undefined atoms in the same way as shared lib atoms: to be + // resolved at run time. + else if (isa<SharedLibraryAtom>(ref.target()) || + isa<UndefinedAtom>(ref.target())) + const_cast<Reference &>(ref).setTarget(getSharedGOT(ref.target())); + return std::error_code(); + } +}; +} // end anon namespace + +std::unique_ptr<Pass> +lld::elf::createX86_64RelocationPass(const X86_64LinkingContext &ctx) { + switch (ctx.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + if (ctx.isDynamic()) + return llvm::make_unique<DynamicRelocationPass>(ctx); + return llvm::make_unique<StaticRelocationPass>(ctx); + case llvm::ELF::ET_DYN: + return llvm::make_unique<DynamicRelocationPass>(ctx); + case llvm::ELF::ET_REL: + return nullptr; + default: + llvm_unreachable("Unhandled output file type"); + } +} diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.h b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.h new file mode 100644 index 000000000000..1635b5e5f57b --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.h @@ -0,0 +1,32 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64RelocationPass.h -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Declares the relocation processing pass for x86-64. This includes +/// GOT and PLT entries, TLS, COPY, and ifunc. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_X86_64_RELOCATION_PASS_H +#define LLD_READER_WRITER_ELF_X86_64_X86_64_RELOCATION_PASS_H + +#include <memory> + +namespace lld { +class Pass; +namespace elf { +class X86_64LinkingContext; + +/// \brief Create x86-64 relocation pass for the given linking context. +std::unique_ptr<Pass> +createX86_64RelocationPass(const X86_64LinkingContext &); +} +} + +#endif diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.cpp b/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.cpp new file mode 100644 index 000000000000..f35330eb25c0 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.cpp @@ -0,0 +1,52 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.cpp ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "X86_64DynamicLibraryWriter.h" +#include "X86_64ExecutableWriter.h" +#include "X86_64LinkingContext.h" +#include "X86_64TargetHandler.h" + +using namespace lld; +using namespace elf; + +X86_64TargetHandler::X86_64TargetHandler(X86_64LinkingContext &context) + : _context(context), _x86_64TargetLayout(new X86_64TargetLayout(context)), + _x86_64RelocationHandler( + new X86_64TargetRelocationHandler(*_x86_64TargetLayout.get())) {} + +void X86_64TargetHandler::registerRelocationNames(Registry ®istry) { + registry.addKindTable(Reference::KindNamespace::ELF, + Reference::KindArch::x86_64, kindStrings); +} + +std::unique_ptr<Writer> X86_64TargetHandler::getWriter() { + switch (this->_context.getOutputELFType()) { + case llvm::ELF::ET_EXEC: + return std::unique_ptr<Writer>( + new X86_64ExecutableWriter(_context, *_x86_64TargetLayout.get())); + case llvm::ELF::ET_DYN: + return std::unique_ptr<Writer>( + new X86_64DynamicLibraryWriter(_context, *_x86_64TargetLayout.get())); + case llvm::ELF::ET_REL: + llvm_unreachable("TODO: support -r mode"); + default: + llvm_unreachable("unsupported output type"); + } +} + +#define ELF_RELOC(name, value) LLD_KIND_STRING_ENTRY(name), + +const Registry::KindStrings X86_64TargetHandler::kindStrings[] = { +#include "llvm/Support/ELFRelocs/x86_64.def" + LLD_KIND_STRING_ENTRY(LLD_R_X86_64_GOTRELINDEX), + LLD_KIND_STRING_END +}; + +#undef ELF_RELOC diff --git a/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.h b/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.h new file mode 100644 index 000000000000..57da7bca01e6 --- /dev/null +++ b/lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.h @@ -0,0 +1,69 @@ +//===- lib/ReaderWriter/ELF/X86_64/X86_64TargetHandler.h ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_ELF_X86_64_X86_64_TARGET_HANDLER_H +#define LLD_READER_WRITER_ELF_X86_64_X86_64_TARGET_HANDLER_H + +#include "DefaultTargetHandler.h" +#include "TargetLayout.h" +#include "X86_64ELFFile.h" +#include "X86_64ELFReader.h" +#include "X86_64LinkingContext.h" +#include "X86_64RelocationHandler.h" +#include "lld/Core/Simple.h" + +namespace lld { +namespace elf { +class X86_64TargetLayout : public TargetLayout<X86_64ELFType> { +public: + X86_64TargetLayout(X86_64LinkingContext &context) + : TargetLayout(context) {} + + void finalizeOutputSectionLayout() override { + sortOutputSectionByPriority(".init_array", ".init_array"); + sortOutputSectionByPriority(".fini_array", ".fini_array"); + } +}; + +class X86_64TargetHandler + : public DefaultTargetHandler<X86_64ELFType> { +public: + X86_64TargetHandler(X86_64LinkingContext &context); + + X86_64TargetLayout &getTargetLayout() override { + return *(_x86_64TargetLayout.get()); + } + + void registerRelocationNames(Registry ®istry) override; + + const X86_64TargetRelocationHandler &getRelocationHandler() const override { + return *(_x86_64RelocationHandler.get()); + } + + std::unique_ptr<Reader> getObjReader() override { + return std::unique_ptr<Reader>(new X86_64ELFObjectReader(_context)); + } + + std::unique_ptr<Reader> getDSOReader() override { + return std::unique_ptr<Reader>(new X86_64ELFDSOReader(_context)); + } + + std::unique_ptr<Writer> getWriter() override; + +protected: + static const Registry::KindStrings kindStrings[]; + X86_64LinkingContext &_context; + std::unique_ptr<X86_64TargetLayout> _x86_64TargetLayout; + std::unique_ptr<X86_64TargetRelocationHandler> _x86_64RelocationHandler; +}; + +} // end namespace elf +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/FileArchive.cpp b/lib/ReaderWriter/FileArchive.cpp new file mode 100644 index 000000000000..3f38814ae18e --- /dev/null +++ b/lib/ReaderWriter/FileArchive.cpp @@ -0,0 +1,293 @@ +//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Parallel.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include <memory> +#include <mutex> +#include <set> +#include <unordered_map> + +using llvm::object::Archive; +using llvm::object::ObjectFile; +using llvm::object::SymbolRef; +using llvm::object::symbol_iterator; +using llvm::object::object_error; + +namespace lld { + +namespace { + +/// \brief The FileArchive class represents an Archive Library file +class FileArchive : public lld::ArchiveLibraryFile { +public: + FileArchive(std::unique_ptr<MemoryBuffer> mb, const Registry ®, + StringRef path, bool logLoading) + : ArchiveLibraryFile(path), _mb(std::shared_ptr<MemoryBuffer>(mb.release())), + _registry(reg), _logLoading(logLoading) {} + + /// \brief Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + File *find(StringRef name, bool dataSymbolOnly) override { + auto member = _symbolMemberMap.find(name); + if (member == _symbolMemberMap.end()) + return nullptr; + Archive::child_iterator ci = member->second; + + // Don't return a member already returned + const char *memberStart = ci->getBuffer().data(); + if (_membersInstantiated.count(memberStart)) + return nullptr; + if (dataSymbolOnly && !isDataSymbol(ci, name)) + return nullptr; + + _membersInstantiated.insert(memberStart); + + // Check if a file is preloaded. + { + std::lock_guard<std::mutex> lock(_mutex); + auto it = _preloaded.find(memberStart); + if (it != _preloaded.end()) { + std::unique_ptr<Future<File *>> &p = it->second; + Future<File *> *future = p.get(); + return future->get(); + } + } + + std::unique_ptr<File> result; + if (instantiateMember(ci, result)) + return nullptr; + + // give up the pointer so that this object no longer manages it + return result.release(); + } + + // Instantiate a member file containing a given symbol name. + void preload(TaskGroup &group, StringRef name) override { + auto member = _symbolMemberMap.find(name); + if (member == _symbolMemberMap.end()) + return; + Archive::child_iterator ci = member->second; + + // Do nothing if a member is already instantiated. + const char *memberStart = ci->getBuffer().data(); + if (_membersInstantiated.count(memberStart)) + return; + + std::lock_guard<std::mutex> lock(_mutex); + if (_preloaded.find(memberStart) != _preloaded.end()) + return; + + // Instantiate the member + auto *future = new Future<File *>(); + _preloaded[memberStart] = std::unique_ptr<Future<File *>>(future); + + group.spawn([=] { + std::unique_ptr<File> result; + std::error_code ec = instantiateMember(ci, result); + future->set(ec ? nullptr : result.release()); + }); + } + + /// \brief parse each member + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + if (std::error_code ec = parse()) + return ec; + for (auto mf = _archive->child_begin(), me = _archive->child_end(); + mf != me; ++mf) { + std::unique_ptr<File> file; + if (std::error_code ec = instantiateMember(mf, file)) + return ec; + result.push_back(std::move(file)); + } + return std::error_code(); + } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + /// Returns a set of all defined symbols in the archive. + std::set<StringRef> getDefinedSymbols() override { + parse(); + std::set<StringRef> ret; + for (const auto &e : _symbolMemberMap) + ret.insert(e.first); + return ret; + } + +protected: + std::error_code doParse() override { + // Make Archive object which will be owned by FileArchive object. + std::error_code ec; + _archive.reset(new Archive(_mb->getMemBufferRef(), ec)); + if (ec) + return ec; + if ((ec = buildTableOfContents())) + return ec; + return std::error_code(); + } + +private: + std::error_code + instantiateMember(Archive::child_iterator member, + std::unique_ptr<File> &result) const { + ErrorOr<llvm::MemoryBufferRef> mbOrErr = member->getMemoryBufferRef(); + if (std::error_code ec = mbOrErr.getError()) + return ec; + llvm::MemoryBufferRef mb = mbOrErr.get(); + std::string memberPath = (_archive->getFileName() + "(" + + mb.getBufferIdentifier() + ")").str(); + + if (_logLoading) + llvm::errs() << memberPath << "\n"; + + std::unique_ptr<MemoryBuffer> memberMB(MemoryBuffer::getMemBuffer( + mb.getBuffer(), mb.getBufferIdentifier(), false)); + + std::vector<std::unique_ptr<File>> files; + if (std::error_code ec = _registry.loadFile(std::move(memberMB), files)) + return ec; + assert(files.size() == 1); + result = std::move(files[0]); + if (std::error_code ec = result->parse()) + return ec; + result->setArchivePath(_archive->getFileName()); + + // The memory buffer is co-owned by the archive file and the children, + // so that the bufffer is deallocated when all the members are destructed. + result->setSharedMemoryBuffer(_mb); + return std::error_code(); + } + + // Parses the given memory buffer as an object file, and returns true + // code if the given symbol is a data symbol. If the symbol is not a data + // symbol or does not exist, returns false. + bool isDataSymbol(Archive::child_iterator member, StringRef symbol) const { + ErrorOr<llvm::MemoryBufferRef> buf = member->getMemoryBufferRef(); + if (buf.getError()) + return false; + std::unique_ptr<MemoryBuffer> mb(MemoryBuffer::getMemBuffer( + buf.get().getBuffer(), buf.get().getBufferIdentifier(), false)); + + auto objOrErr(ObjectFile::createObjectFile(mb->getMemBufferRef())); + if (objOrErr.getError()) + return false; + std::unique_ptr<ObjectFile> obj = std::move(objOrErr.get()); + + for (SymbolRef sym : obj->symbols()) { + // Skip until we find the symbol. + StringRef name; + if (sym.getName(name)) + return false; + if (name != symbol) + continue; + uint32_t flags = sym.getFlags(); + if (flags <= SymbolRef::SF_Undefined) + continue; + + // Returns true if it's a data symbol. + SymbolRef::Type type; + if (sym.getType(type)) + return false; + if (type == SymbolRef::ST_Data) + return true; + } + return false; + } + + std::error_code buildTableOfContents() { + DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() + << "Table of contents for archive '" + << _archive->getFileName() << "':\n"); + for (const Archive::Symbol &sym : _archive->symbols()) { + StringRef name = sym.getName(); + ErrorOr<Archive::child_iterator> memberOrErr = sym.getMember(); + if (std::error_code ec = memberOrErr.getError()) + return ec; + Archive::child_iterator member = memberOrErr.get(); + DEBUG_WITH_TYPE( + "FileArchive", + llvm::dbgs() << llvm::format("0x%08llX ", member->getBuffer().data()) + << "'" << name << "'\n"); + _symbolMemberMap[name] = member; + } + return std::error_code(); + } + + typedef std::unordered_map<StringRef, Archive::child_iterator> MemberMap; + typedef std::set<const char *> InstantiatedSet; + + std::shared_ptr<MemoryBuffer> _mb; + const Registry &_registry; + std::unique_ptr<Archive> _archive; + MemberMap _symbolMemberMap; + InstantiatedSet _membersInstantiated; + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; + bool _logLoading; + std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers; + std::map<const char *, std::unique_ptr<Future<File *>>> _preloaded; + std::mutex _mutex; +}; + +class ArchiveReader : public Reader { +public: + ArchiveReader(bool logLoading) : _logLoading(logLoading) {} + + bool canParse(file_magic magic, StringRef, + const MemoryBuffer &) const override { + return (magic == llvm::sys::fs::file_magic::archive); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®, + std::vector<std::unique_ptr<File>> &result) const override { + StringRef path = mb->getBufferIdentifier(); + std::unique_ptr<FileArchive> file( + new FileArchive(std::move(mb), reg, path, _logLoading)); + result.push_back(std::move(file)); + return std::error_code(); + } + +private: + bool _logLoading; +}; + +} // anonymous namespace + +void Registry::addSupportArchives(bool logLoading) { + add(std::unique_ptr<Reader>(new ArchiveReader(logLoading))); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/LinkerScript.cpp b/lib/ReaderWriter/LinkerScript.cpp new file mode 100644 index 000000000000..56194cae5e72 --- /dev/null +++ b/lib/ReaderWriter/LinkerScript.cpp @@ -0,0 +1,2564 @@ +//===- ReaderWriter/LinkerScript.cpp --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Linker script parser. +/// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/LinkerScript.h" + +namespace lld { +namespace script { +void Token::dump(raw_ostream &os) const { + switch (_kind) { +#define CASE(name) \ + case Token::name: \ + os << #name ": "; \ + break; + CASE(unknown) + CASE(eof) + CASE(exclaim) + CASE(exclaimequal) + CASE(amp) + CASE(ampequal) + CASE(l_paren) + CASE(r_paren) + CASE(star) + CASE(starequal) + CASE(plus) + CASE(plusequal) + CASE(comma) + CASE(minus) + CASE(minusequal) + CASE(slash) + CASE(slashequal) + CASE(number) + CASE(colon) + CASE(semicolon) + CASE(less) + CASE(lessequal) + CASE(lessless) + CASE(lesslessequal) + CASE(equal) + CASE(equalequal) + CASE(greater) + CASE(greaterequal) + CASE(greatergreater) + CASE(greatergreaterequal) + CASE(question) + CASE(identifier) + CASE(libname) + CASE(kw_align) + CASE(kw_align_with_input) + CASE(kw_as_needed) + CASE(kw_at) + CASE(kw_discard) + CASE(kw_entry) + CASE(kw_exclude_file) + CASE(kw_extern) + CASE(kw_group) + CASE(kw_hidden) + CASE(kw_input) + CASE(kw_keep) + CASE(kw_length) + CASE(kw_memory) + CASE(kw_origin) + CASE(kw_provide) + CASE(kw_provide_hidden) + CASE(kw_only_if_ro) + CASE(kw_only_if_rw) + CASE(kw_output) + CASE(kw_output_arch) + CASE(kw_output_format) + CASE(kw_overlay) + CASE(kw_search_dir) + CASE(kw_sections) + CASE(kw_sort_by_alignment) + CASE(kw_sort_by_init_priority) + CASE(kw_sort_by_name) + CASE(kw_sort_none) + CASE(kw_subalign) + CASE(l_brace) + CASE(pipe) + CASE(pipeequal) + CASE(r_brace) + CASE(tilde) +#undef CASE + } + os << _range << "\n"; +} + +static llvm::ErrorOr<uint64_t> parseDecimal(StringRef str) { + uint64_t res = 0; + for (auto &c : str) { + res *= 10; + if (c < '0' || c > '9') + return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error)); + res += c - '0'; + } + return res; +} + +static llvm::ErrorOr<uint64_t> parseOctal(StringRef str) { + uint64_t res = 0; + for (auto &c : str) { + res <<= 3; + if (c < '0' || c > '7') + return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error)); + res += c - '0'; + } + return res; +} + +static llvm::ErrorOr<uint64_t> parseBinary(StringRef str) { + uint64_t res = 0; + for (auto &c : str) { + res <<= 1; + if (c != '0' && c != '1') + return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error)); + res += c - '0'; + } + return res; +} + +static llvm::ErrorOr<uint64_t> parseHex(StringRef str) { + uint64_t res = 0; + for (auto &c : str) { + res <<= 4; + if (c >= '0' && c <= '9') + res += c - '0'; + else if (c >= 'a' && c <= 'f') + res += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + res += c - 'A' + 10; + else + return llvm::ErrorOr<uint64_t>(std::make_error_code(std::errc::io_error)); + } + return res; +} + +static bool parseHexToByteStream(StringRef str, std::string &buf) { + unsigned char byte = 0; + bool dumpByte = str.size() % 2; + for (auto &c : str) { + byte <<= 4; + if (c >= '0' && c <= '9') + byte += c - '0'; + else if (c >= 'a' && c <= 'f') + byte += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + byte += c - 'A' + 10; + else + return false; + if (!dumpByte) { + dumpByte = true; + continue; + } + buf.push_back(byte); + byte = 0; + dumpByte = false; + } + return !dumpByte; +} + +static void dumpByteStream(raw_ostream &os, StringRef stream) { + os << "0x"; + for (auto &c : stream) { + unsigned char firstNibble = c >> 4 & 0xF; + if (firstNibble > 9) + os << (char) ('A' + firstNibble - 10); + else + os << (char) ('0' + firstNibble); + unsigned char secondNibble = c & 0xF; + if (secondNibble > 9) + os << (char) ('A' + secondNibble - 10); + else + os << (char) ('0' + secondNibble); + } +} + +static llvm::ErrorOr<uint64_t> parseNum(StringRef str) { + unsigned multiplier = 1; + enum NumKind { decimal, hex, octal, binary }; + NumKind kind = llvm::StringSwitch<NumKind>(str) + .StartsWith("0x", hex) + .StartsWith("0X", hex) + .StartsWith("0", octal) + .Default(decimal); + + // Parse scale + if (str.endswith("K")) { + multiplier = 1 << 10; + str = str.drop_back(); + } else if (str.endswith("M")) { + multiplier = 1 << 20; + str = str.drop_back(); + } + + // Parse type + if (str.endswith_lower("o")) { + kind = octal; + str = str.drop_back(); + } else if (str.endswith_lower("h")) { + kind = hex; + str = str.drop_back(); + } else if (str.endswith_lower("d")) { + kind = decimal; + str = str.drop_back(); + } else if (str.endswith_lower("b")) { + kind = binary; + str = str.drop_back(); + } + + llvm::ErrorOr<uint64_t> res(0); + switch (kind) { + case hex: + if (str.startswith_lower("0x")) + str = str.drop_front(2); + res = parseHex(str); + break; + case octal: + res = parseOctal(str); + break; + case decimal: + res = parseDecimal(str); + break; + case binary: + res = parseBinary(str); + break; + } + if (res.getError()) + return res; + + *res = *res * multiplier; + return res; +} + +bool Lexer::canStartNumber(char c) const { + return '0' <= c && c <= '9'; +} + +bool Lexer::canContinueNumber(char c) const { + // [xX] = hex marker, [hHoO] = type suffix, [MK] = scale suffix. + return strchr("0123456789ABCDEFabcdefxXhHoOMK", c); +} + +bool Lexer::canStartName(char c) const { + return strchr( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_.$/\\*", c); +} + +bool Lexer::canContinueName(char c) const { + return strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789_.$/\\~=+[]*?-:", c); +} + +/// Helper function to split a StringRef in two at the nth character. +/// The StringRef s is updated, while the function returns the n first +/// characters. +static StringRef drop(StringRef &s, int n) { + StringRef res = s.substr(0, n); + s = s.drop_front(n); + return res; +} + +void Lexer::lex(Token &tok) { + skipWhitespace(); + if (_buffer.empty()) { + tok = Token(_buffer, Token::eof); + return; + } + switch (_buffer[0]) { + case 0: + tok = Token(drop(_buffer, 1), Token::eof); + return; + case '(': + tok = Token(drop(_buffer, 1), Token::l_paren); + return; + case ')': + tok = Token(drop(_buffer, 1), Token::r_paren); + return; + case '{': + tok = Token(drop(_buffer, 1), Token::l_brace); + return; + case '}': + tok = Token(drop(_buffer, 1), Token::r_brace); + return; + case '=': + if (_buffer.startswith("==")) { + tok = Token(drop(_buffer, 2), Token::equalequal); + return; + } + tok = Token(drop(_buffer, 1), Token::equal); + return; + case '!': + if (_buffer.startswith("!=")) { + tok = Token(drop(_buffer, 2), Token::exclaimequal); + return; + } + tok = Token(drop(_buffer, 1), Token::exclaim); + return; + case ',': + tok = Token(drop(_buffer, 1), Token::comma); + return; + case ';': + tok = Token(drop(_buffer, 1), Token::semicolon); + return; + case ':': + tok = Token(drop(_buffer, 1), Token::colon); + return; + case '&': + if (_buffer.startswith("&=")) { + tok = Token(drop(_buffer, 2), Token::ampequal); + return; + } + tok = Token(drop(_buffer, 1), Token::amp); + return; + case '|': + if (_buffer.startswith("|=")) { + tok = Token(drop(_buffer, 2), Token::pipeequal); + return; + } + tok = Token(drop(_buffer, 1), Token::pipe); + return; + case '+': + if (_buffer.startswith("+=")) { + tok = Token(drop(_buffer, 2), Token::plusequal); + return; + } + tok = Token(drop(_buffer, 1), Token::plus); + return; + case '-': { + if (_buffer.startswith("-=")) { + tok = Token(drop(_buffer, 2), Token::minusequal); + return; + } + if (!_buffer.startswith("-l")) { + tok = Token(drop(_buffer, 1), Token::minus); + return; + } + // -l<lib name> + _buffer = _buffer.drop_front(2); + StringRef::size_type start = 0; + if (_buffer[start] == ':') + ++start; + if (!canStartName(_buffer[start])) + // Create 'unknown' token. + break; + auto libNameEnd = std::find_if(_buffer.begin() + start + 1, _buffer.end(), + [=](char c) { return !canContinueName(c); }); + StringRef::size_type libNameLen = + std::distance(_buffer.begin(), libNameEnd); + tok = Token(_buffer.substr(0, libNameLen), Token::libname); + _buffer = _buffer.drop_front(libNameLen); + return; + } + case '<': + if (_buffer.startswith("<<=")) { + tok = Token(drop(_buffer, 3), Token::lesslessequal); + return; + } + if (_buffer.startswith("<<")) { + tok = Token(drop(_buffer, 2), Token::lessless); + return; + } + if (_buffer.startswith("<=")) { + tok = Token(drop(_buffer, 2), Token::lessequal); + return; + } + tok = Token(drop(_buffer, 1), Token::less); + return; + case '>': + if (_buffer.startswith(">>=")) { + tok = Token(drop(_buffer, 3), Token::greatergreaterequal); + return; + } + if (_buffer.startswith(">>")) { + tok = Token(drop(_buffer, 2), Token::greatergreater); + return; + } + if (_buffer.startswith(">=")) { + tok = Token(drop(_buffer, 2), Token::greaterequal); + return; + } + tok = Token(drop(_buffer, 1), Token::greater); + return; + case '~': + tok = Token(drop(_buffer, 1), Token::tilde); + return; + case '\"': case '\'': { + // Handle quoted strings. They are treated as identifiers for + // simplicity. + char c = _buffer[0]; + _buffer = _buffer.drop_front(); + auto quotedStringEnd = _buffer.find(c); + if (quotedStringEnd == StringRef::npos || quotedStringEnd == 0) + break; + StringRef word = _buffer.substr(0, quotedStringEnd); + tok = Token(word, Token::identifier); + _buffer = _buffer.drop_front(quotedStringEnd + 1); + return; + } + default: + // Handle literal numbers + if (canStartNumber(_buffer[0])) { + auto endIter = std::find_if(_buffer.begin(), _buffer.end(), [=](char c) { + return !canContinueNumber(c); + }); + StringRef::size_type end = endIter == _buffer.end() + ? StringRef::npos + : std::distance(_buffer.begin(), endIter); + if (end == StringRef::npos || end == 0) + break; + StringRef word = _buffer.substr(0, end); + tok = Token(word, Token::number); + _buffer = _buffer.drop_front(end); + return; + } + // Handle slashes '/', which can be either an operator inside an expression + // or the beginning of an identifier + if (_buffer.startswith("/=")) { + tok = Token(drop(_buffer, 2), Token::slashequal); + return; + } + if (_buffer[0] == '/' && _buffer.size() > 1 && + !canContinueName(_buffer[1])) { + tok = Token(drop(_buffer, 1), Token::slash); + return; + } + // Handle stars '*' + if (_buffer.startswith("*=")) { + tok = Token(drop(_buffer, 2), Token::starequal); + return; + } + if (_buffer[0] == '*' && _buffer.size() > 1 && + !canContinueName(_buffer[1])) { + tok = Token(drop(_buffer, 1), Token::star); + return; + } + // Handle questions '?' + if (_buffer[0] == '?' && _buffer.size() > 1 && + !canContinueName(_buffer[1])) { + tok = Token(drop(_buffer, 1), Token::question); + return; + } + // keyword or identifier. + if (!canStartName(_buffer[0])) + break; + auto endIter = std::find_if(_buffer.begin() + 1, _buffer.end(), + [=](char c) { return !canContinueName(c); }); + StringRef::size_type end = endIter == _buffer.end() + ? StringRef::npos + : std::distance(_buffer.begin(), endIter); + if (end == StringRef::npos || end == 0) + break; + StringRef word = _buffer.substr(0, end); + Token::Kind kind = + llvm::StringSwitch<Token::Kind>(word) + .Case("ALIGN", Token::kw_align) + .Case("ALIGN_WITH_INPUT", Token::kw_align_with_input) + .Case("AS_NEEDED", Token::kw_as_needed) + .Case("AT", Token::kw_at) + .Case("ENTRY", Token::kw_entry) + .Case("EXCLUDE_FILE", Token::kw_exclude_file) + .Case("EXTERN", Token::kw_extern) + .Case("GROUP", Token::kw_group) + .Case("HIDDEN", Token::kw_hidden) + .Case("INPUT", Token::kw_input) + .Case("KEEP", Token::kw_keep) + .Case("LENGTH", Token::kw_length) + .Case("l", Token::kw_length) + .Case("len", Token::kw_length) + .Case("MEMORY", Token::kw_memory) + .Case("ONLY_IF_RO", Token::kw_only_if_ro) + .Case("ONLY_IF_RW", Token::kw_only_if_rw) + .Case("ORIGIN", Token::kw_origin) + .Case("o", Token::kw_origin) + .Case("org", Token::kw_origin) + .Case("OUTPUT", Token::kw_output) + .Case("OUTPUT_ARCH", Token::kw_output_arch) + .Case("OUTPUT_FORMAT", Token::kw_output_format) + .Case("OVERLAY", Token::kw_overlay) + .Case("PROVIDE", Token::kw_provide) + .Case("PROVIDE_HIDDEN", Token::kw_provide_hidden) + .Case("SEARCH_DIR", Token::kw_search_dir) + .Case("SECTIONS", Token::kw_sections) + .Case("SORT", Token::kw_sort_by_name) + .Case("SORT_BY_ALIGNMENT", Token::kw_sort_by_alignment) + .Case("SORT_BY_INIT_PRIORITY", Token::kw_sort_by_init_priority) + .Case("SORT_BY_NAME", Token::kw_sort_by_name) + .Case("SORT_NONE", Token::kw_sort_none) + .Case("SUBALIGN", Token::kw_subalign) + .Case("/DISCARD/", Token::kw_discard) + .Default(Token::identifier); + tok = Token(word, kind); + _buffer = _buffer.drop_front(end); + return; + } + tok = Token(drop(_buffer, 1), Token::unknown); +} + +void Lexer::skipWhitespace() { + while (true) { + if (_buffer.empty()) + return; + switch (_buffer[0]) { + case ' ': + case '\r': + case '\n': + case '\t': + _buffer = _buffer.drop_front(); + break; + // Potential comment. + case '/': + if (_buffer.size() <= 1 || _buffer[1] != '*') + return; + // Skip starting /* + _buffer = _buffer.drop_front(2); + // If the next char is also a /, it's not the end. + if (!_buffer.empty() && _buffer[0] == '/') + _buffer = _buffer.drop_front(); + + // Scan for /'s. We're done if it is preceded by a *. + while (true) { + if (_buffer.empty()) + break; + _buffer = _buffer.drop_front(); + if (_buffer.data()[-1] == '/' && _buffer.data()[-2] == '*') + break; + } + break; + default: + return; + } + } +} + +// Constant functions +void Constant::dump(raw_ostream &os) const { os << _num; } + +ErrorOr<int64_t> Constant::evalExpr(SymbolTableTy &symbolTable) const { + return _num; +} + +// Symbol functions +void Symbol::dump(raw_ostream &os) const { os << _name; } + +ErrorOr<int64_t> Symbol::evalExpr(SymbolTableTy &symbolTable) const { + auto it = symbolTable.find(_name); + if (it == symbolTable.end()) + return LinkerScriptReaderError::unknown_symbol_in_expr; + return it->second; +} + +// FunctionCall functions +void FunctionCall::dump(raw_ostream &os) const { + os << _name << "("; + for (unsigned i = 0, e = _args.size(); i != e; ++i) { + if (i) + os << ", "; + _args[i]->dump(os); + } + os << ")"; +} + +ErrorOr<int64_t> FunctionCall::evalExpr(SymbolTableTy &symbolTable) const { + return LinkerScriptReaderError::unrecognized_function_in_expr; +} + +// Unary functions +void Unary::dump(raw_ostream &os) const { + os << "("; + if (_op == Unary::Minus) + os << "-"; + else + os << "~"; + _child->dump(os); + os << ")"; +} + +ErrorOr<int64_t> Unary::evalExpr(SymbolTableTy &symbolTable) const { + auto child = _child->evalExpr(symbolTable); + if (child.getError()) + return child.getError(); + + int64_t childRes = *child; + switch (_op) { + case Unary::Minus: + return -childRes; + case Unary::Not: + return ~childRes; + } + + llvm_unreachable(""); +} + +// BinOp functions +void BinOp::dump(raw_ostream &os) const { + os << "("; + _lhs->dump(os); + os << " "; + switch (_op) { + case Sum: + os << "+"; + break; + case Sub: + os << "-"; + break; + case Mul: + os << "*"; + break; + case Div: + os << "/"; + break; + case Shl: + os << "<<"; + break; + case Shr: + os << ">>"; + break; + case And: + os << "&"; + break; + case Or: + os << "|"; + break; + case CompareEqual: + os << "=="; + break; + case CompareDifferent: + os << "!="; + break; + case CompareLess: + os << "<"; + break; + case CompareGreater: + os << ">"; + break; + case CompareLessEqual: + os << "<="; + break; + case CompareGreaterEqual: + os << ">="; + break; + } + os << " "; + _rhs->dump(os); + os << ")"; +} + +ErrorOr<int64_t> BinOp::evalExpr(SymbolTableTy &symbolTable) const { + auto lhs = _lhs->evalExpr(symbolTable); + if (lhs.getError()) + return lhs.getError(); + auto rhs = _rhs->evalExpr(symbolTable); + if (rhs.getError()) + return rhs.getError(); + + int64_t lhsRes = *lhs; + int64_t rhsRes = *rhs; + + switch(_op) { + case And: return lhsRes & rhsRes; + case CompareDifferent: return lhsRes != rhsRes; + case CompareEqual: return lhsRes == rhsRes; + case CompareGreater: return lhsRes > rhsRes; + case CompareGreaterEqual: return lhsRes >= rhsRes; + case CompareLess: return lhsRes < rhsRes; + case CompareLessEqual: return lhsRes <= rhsRes; + case Div: return lhsRes / rhsRes; + case Mul: return lhsRes * rhsRes; + case Or: return lhsRes | rhsRes; + case Shl: return lhsRes << rhsRes; + case Shr: return lhsRes >> rhsRes; + case Sub: return lhsRes - rhsRes; + case Sum: return lhsRes + rhsRes; + } + + llvm_unreachable(""); +} + +// TernaryConditional functions +void TernaryConditional::dump(raw_ostream &os) const { + _conditional->dump(os); + os << " ? "; + _trueExpr->dump(os); + os << " : "; + _falseExpr->dump(os); +} + +ErrorOr<int64_t> +TernaryConditional::evalExpr(SymbolTableTy &symbolTable) const { + auto conditional = _conditional->evalExpr(symbolTable); + if (conditional.getError()) + return conditional.getError(); + if (*conditional) + return _trueExpr->evalExpr(symbolTable); + return _falseExpr->evalExpr(symbolTable); +} + +// SymbolAssignment functions +void SymbolAssignment::dump(raw_ostream &os) const { + int numParen = 0; + + if (_assignmentVisibility != Default) { + switch (_assignmentVisibility) { + case Hidden: + os << "HIDDEN("; + break; + case Provide: + os << "PROVIDE("; + break; + case ProvideHidden: + os << "PROVIDE_HIDDEN("; + break; + default: + llvm_unreachable("Unknown visibility"); + } + ++numParen; + } + + os << _symbol << " "; + switch (_assignmentKind) { + case Simple: + os << "="; + break; + case Sum: + os << "+="; + break; + case Sub: + os << "-="; + break; + case Mul: + os << "*="; + break; + case Div: + os << "/="; + break; + case Shl: + os << "<<="; + break; + case Shr: + os << ">>="; + break; + case And: + os << "&="; + break; + case Or: + os << "|="; + break; + } + + os << " "; + _expression->dump(os); + if (numParen) + os << ")"; + os << ";"; +} + +static int dumpSortDirectives(raw_ostream &os, WildcardSortMode sortMode) { + switch (sortMode) { + case WildcardSortMode::NA: + return 0; + case WildcardSortMode::ByName: + os << "SORT_BY_NAME("; + return 1; + case WildcardSortMode::ByAlignment: + os << "SORT_BY_ALIGNMENT("; + return 1; + case WildcardSortMode::ByInitPriority: + os << "SORT_BY_INIT_PRIORITY("; + return 1; + case WildcardSortMode::ByNameAndAlignment: + os << "SORT_BY_NAME(SORT_BY_ALIGNMENT("; + return 2; + case WildcardSortMode::ByAlignmentAndName: + os << "SORT_BY_ALIGNMENT(SORT_BY_NAME("; + return 2; + case WildcardSortMode::None: + os << "SORT_NONE("; + return 1; + } + return 0; +} + +// InputSectionName functions +void InputSectionName::dump(raw_ostream &os) const { + os << _name; +} + +// InputSectionSortedGroup functions +static void dumpInputSections(raw_ostream &os, + llvm::ArrayRef<const InputSection *> secs) { + bool excludeFile = false; + bool first = true; + + for (auto &secName : secs) { + if (!first) + os << " "; + first = false; + // Coalesce multiple input sections marked with EXCLUDE_FILE in the same + // EXCLUDE_FILE() group + if (auto inputSec = dyn_cast<InputSectionName>(secName)) { + if (!excludeFile && inputSec->hasExcludeFile()) { + excludeFile = true; + os << "EXCLUDE_FILE("; + } else if (excludeFile && !inputSec->hasExcludeFile()) { + excludeFile = false; + os << ") "; + } + } + secName->dump(os); + } + + if (excludeFile) + os << ")"; +} + +void InputSectionSortedGroup::dump(raw_ostream &os) const { + int numParen = dumpSortDirectives(os, _sortMode); + dumpInputSections(os, _sections); + for (int i = 0; i < numParen; ++i) + os << ")"; +} + +// InputSectionsCmd functions +void InputSectionsCmd::dump(raw_ostream &os) const { + if (_keep) + os << "KEEP("; + + int numParen = dumpSortDirectives(os, _fileSortMode); + os << _memberName; + for (int i = 0; i < numParen; ++i) + os << ")"; + + if (_archiveName.size() > 0) { + os << ":"; + numParen = dumpSortDirectives(os, _archiveSortMode); + os << _archiveName; + for (int i = 0; i < numParen; ++i) + os << ")"; + } + + if (_sections.size() > 0) { + os << "("; + dumpInputSections(os, _sections); + os << ")"; + } + + if (_keep) + os << ")"; +} + +// OutputSectionDescription functions +void OutputSectionDescription::dump(raw_ostream &os) const { + if (_discard) + os << "/DISCARD/"; + else + os << _sectionName; + + if (_address) { + os << " "; + _address->dump(os); + } + os << " :\n"; + + if (_at) { + os << " AT("; + _at->dump(os); + os << ")\n"; + } + + if (_align) { + os << " ALIGN("; + _align->dump(os); + os << ")\n"; + } else if (_alignWithInput) { + os << " ALIGN_WITH_INPUT\n"; + } + + if (_subAlign) { + os << " SUBALIGN("; + _subAlign->dump(os); + os << ")\n"; + } + + switch (_constraint) { + case C_None: + break; + case C_OnlyIfRO: + os << "ONLY_IF_RO"; + break; + case C_OnlyIfRW: + os << "ONLY_IF_RW"; + break; + } + + os << " {\n"; + for (auto &command : _outputSectionCommands) { + os << " "; + command->dump(os); + os << "\n"; + } + os << " }"; + + if (_fillStream.size() > 0) { + os << " ="; + dumpByteStream(os, _fillStream); + } else if (_fillExpr) { + os << " ="; + _fillExpr->dump(os); + } +} + +// Sections functions +void Sections::dump(raw_ostream &os) const { + os << "SECTIONS\n{\n"; + for (auto &command : _sectionsCommands) { + command->dump(os); + os << "\n"; + } + os << "}\n"; +} + +// Memory functions +void MemoryBlock::dump(raw_ostream &os) const { + os << _name; + + if (!_attr.empty()) + os << " (" << _attr << ")"; + + os << " : "; + + os << "ORIGIN = "; + _origin->dump(os); + os << ", "; + + os << "LENGTH = "; + _length->dump(os); +} + +void Memory::dump(raw_ostream &os) const { + os << "MEMORY\n{\n"; + for (auto &block : _blocks) { + block->dump(os); + os << "\n"; + } + os << "}\n"; +} + +// Extern functions +void Extern::dump(raw_ostream &os) const { + os << "EXTERN("; + for (unsigned i = 0, e = _symbols.size(); i != e; ++i) { + if (i) + os << " "; + os << _symbols[i]; + } + os << ")\n"; +} + + +// Parser functions +std::error_code Parser::parse() { + // Get the first token. + _lex.lex(_tok); + // Parse top level commands. + while (true) { + switch (_tok._kind) { + case Token::eof: + return std::error_code(); + case Token::semicolon: + consumeToken(); + break; + case Token::kw_output: { + auto output = parseOutput(); + if (!output) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(output); + break; + } + case Token::kw_output_format: { + auto outputFormat = parseOutputFormat(); + if (!outputFormat) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(outputFormat); + break; + } + case Token::kw_output_arch: { + auto outputArch = parseOutputArch(); + if (!outputArch) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(outputArch); + break; + } + case Token::kw_input: { + Input *input = parsePathList<Input>(); + if (!input) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(input); + break; + } + case Token::kw_group: { + Group *group = parsePathList<Group>(); + if (!group) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(group); + break; + } + case Token::kw_as_needed: + // Not allowed at top level. + error(_tok, "AS_NEEDED not allowed at top level."); + return LinkerScriptReaderError::parse_error; + case Token::kw_entry: { + Entry *entry = parseEntry(); + if (!entry) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(entry); + break; + } + case Token::kw_search_dir: { + SearchDir *searchDir = parseSearchDir(); + if (!searchDir) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(searchDir); + break; + } + case Token::kw_sections: { + Sections *sections = parseSections(); + if (!sections) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(sections); + break; + } + case Token::identifier: + case Token::kw_hidden: + case Token::kw_provide: + case Token::kw_provide_hidden: { + const Command *cmd = parseSymbolAssignment(); + if (!cmd) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(cmd); + break; + } + case Token::kw_memory: { + const Command *cmd = parseMemory(); + if (!cmd) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(cmd); + break; + } + case Token::kw_extern: { + const Command *cmd = parseExtern(); + if (!cmd) + return LinkerScriptReaderError::parse_error; + _script._commands.push_back(cmd); + break; + } + default: + // Unexpected. + error(_tok, "expected linker script command"); + return LinkerScriptReaderError::parse_error; + } + } + return LinkerScriptReaderError::parse_error; +} + +const Expression *Parser::parseFunctionCall() { + assert((_tok._kind == Token::identifier || _tok._kind == Token::kw_align) && + "expected function call first tokens"); + SmallVector<const Expression *, 8> params; + StringRef name = _tok._range; + + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + if (_tok._kind == Token::r_paren) { + consumeToken(); + return new (_alloc) FunctionCall(*this, _tok._range, params); + } + + if (const Expression *firstParam = parseExpression()) + params.push_back(firstParam); + else + return nullptr; + + while (_tok._kind == Token::comma) { + consumeToken(); + if (const Expression *param = parseExpression()) + params.push_back(param); + else + return nullptr; + } + + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return new (_alloc) FunctionCall(*this, name, params); +} + +bool Parser::expectExprOperand() { + if (!(_tok._kind == Token::identifier || _tok._kind == Token::number || + _tok._kind == Token::kw_align || _tok._kind == Token::l_paren || + _tok._kind == Token::minus || _tok._kind == Token::tilde)) { + error(_tok, "expected symbol, number, minus, tilde or left parenthesis."); + return false; + } + return true; +} + +const Expression *Parser::parseExprOperand() { + if (!expectExprOperand()) + return nullptr; + + switch (_tok._kind) { + case Token::identifier: { + if (peek()._kind== Token::l_paren) + return parseFunctionCall(); + Symbol *sym = new (_alloc) Symbol(*this, _tok._range); + consumeToken(); + return sym; + } + case Token::kw_align: + return parseFunctionCall(); + case Token::minus: + consumeToken(); + return new (_alloc) Unary(*this, Unary::Minus, parseExprOperand()); + case Token::tilde: + consumeToken(); + return new (_alloc) Unary(*this, Unary::Not, parseExprOperand()); + case Token::number: { + auto val = parseNum(_tok._range); + if (val.getError()) { + error(_tok, "Unrecognized number constant"); + return nullptr; + } + Constant *c = new (_alloc) Constant(*this, *val); + consumeToken(); + return c; + } + case Token::l_paren: { + consumeToken(); + const Expression *expr = parseExpression(); + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return expr; + } + default: + llvm_unreachable("Unknown token"); + } +} + +static bool TokenToBinOp(const Token &tok, BinOp::Operation &op, + unsigned &precedence) { + switch (tok._kind) { + case Token::star: + op = BinOp::Mul; + precedence = 3; + return true; + case Token::slash: + op = BinOp::Div; + precedence = 3; + return true; + case Token::plus: + op = BinOp::Sum; + precedence = 4; + return true; + case Token::minus: + op = BinOp::Sub; + precedence = 4; + return true; + case Token::lessless: + op = BinOp::Shl; + precedence = 5; + return true; + case Token::greatergreater: + op = BinOp::Shr; + precedence = 5; + return true; + case Token::less: + op = BinOp::CompareLess; + precedence = 6; + return true; + case Token::greater: + op = BinOp::CompareGreater; + precedence = 6; + return true; + case Token::lessequal: + op = BinOp::CompareLessEqual; + precedence = 6; + return true; + case Token::greaterequal: + op = BinOp::CompareGreaterEqual; + precedence = 6; + return true; + case Token::equalequal: + op = BinOp::CompareEqual; + precedence = 7; + return true; + case Token::exclaimequal: + op = BinOp::CompareDifferent; + precedence = 7; + return true; + case Token::amp: + op = BinOp::And; + precedence = 8; + return true; + case Token::pipe: + op = BinOp::Or; + precedence = 10; + return true; + default: + break; + } + return false; +} + +static bool isExpressionOperator(Token tok) { + switch (tok._kind) { + case Token::star: + case Token::slash: + case Token::plus: + case Token::minus: + case Token::lessless: + case Token::greatergreater: + case Token::less: + case Token::greater: + case Token::lessequal: + case Token::greaterequal: + case Token::equalequal: + case Token::exclaimequal: + case Token::amp: + case Token::pipe: + case Token::question: + return true; + default: + return false; + } +} + +const Expression *Parser::parseExpression(unsigned precedence) { + assert(precedence <= 13 && "Invalid precedence value"); + if (!expectExprOperand()) + return nullptr; + + const Expression *expr = parseExprOperand(); + if (!expr) + return nullptr; + + BinOp::Operation op; + unsigned binOpPrecedence = 0; + if (TokenToBinOp(_tok, op, binOpPrecedence)) { + if (precedence >= binOpPrecedence) + return parseOperatorOperandLoop(expr, precedence); + return expr; + } + + // Non-binary operators + if (_tok._kind == Token::question && precedence >= 13) + return parseOperatorOperandLoop(expr, precedence); + return expr; +} + +const Expression *Parser::parseOperatorOperandLoop(const Expression *lhs, + unsigned highestPrecedence) { + assert(highestPrecedence <= 13 && "Invalid precedence value"); + unsigned precedence = 0; + const Expression *binOp = nullptr; + + while (1) { + BinOp::Operation op; + if (!TokenToBinOp(_tok, op, precedence)) { + if (_tok._kind == Token::question && highestPrecedence >= 13) + return parseTernaryCondOp(lhs); + return binOp; + } + + if (precedence > highestPrecedence) + return binOp; + + consumeToken(); + const Expression *rhs = parseExpression(precedence - 1); + if (!rhs) + return nullptr; + binOp = new (_alloc) BinOp(*this, lhs, op, rhs); + lhs = binOp; + } +} + +const Expression *Parser::parseTernaryCondOp(const Expression *lhs) { + assert(_tok._kind == Token::question && "Expected question mark"); + + consumeToken(); + + // The ternary conditional operator has right-to-left associativity. + // To implement this, we allow our children to contain ternary conditional + // operators themselves (precedence 13). + const Expression *trueExpr = parseExpression(13); + if (!trueExpr) + return nullptr; + + if (!expectAndConsume(Token::colon, "expected :")) + return nullptr; + + const Expression *falseExpr = parseExpression(13); + if (!falseExpr) + return nullptr; + + return new (_alloc) TernaryConditional(*this, lhs, trueExpr, falseExpr); +} + +// Parse OUTPUT(ident) +Output *Parser::parseOutput() { + assert(_tok._kind == Token::kw_output && "Expected OUTPUT"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + if (_tok._kind != Token::identifier) { + error(_tok, "Expected identifier in OUTPUT."); + return nullptr; + } + + auto ret = new (_alloc) Output(*this, _tok._range); + consumeToken(); + + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + + return ret; +} + +// Parse OUTPUT_FORMAT(ident) +OutputFormat *Parser::parseOutputFormat() { + assert(_tok._kind == Token::kw_output_format && "Expected OUTPUT_FORMAT!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + if (_tok._kind != Token::identifier) { + error(_tok, "Expected identifier in OUTPUT_FORMAT."); + return nullptr; + } + + SmallVector<StringRef, 8> formats; + formats.push_back(_tok._range); + + consumeToken(); + + do { + if (isNextToken(Token::comma)) + consumeToken(); + else + break; + if (_tok._kind != Token::identifier) { + error(_tok, "Expected identifier in OUTPUT_FORMAT."); + return nullptr; + } + formats.push_back(_tok._range); + consumeToken(); + } while (isNextToken(Token::comma)); + + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + + return new (_alloc) OutputFormat(*this, formats); +} + +// Parse OUTPUT_ARCH(ident) +OutputArch *Parser::parseOutputArch() { + assert(_tok._kind == Token::kw_output_arch && "Expected OUTPUT_ARCH!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + if (_tok._kind != Token::identifier) { + error(_tok, "Expected identifier in OUTPUT_ARCH."); + return nullptr; + } + + auto ret = new (_alloc) OutputArch(*this, _tok._range); + consumeToken(); + + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + + return ret; +} + +// Parse file list for INPUT or GROUP +template<class T> T *Parser::parsePathList() { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + SmallVector<Path, 8> paths; + while (_tok._kind == Token::identifier || _tok._kind == Token::libname || + _tok._kind == Token::kw_as_needed) { + switch (_tok._kind) { + case Token::identifier: + paths.push_back(Path(_tok._range)); + consumeToken(); + break; + case Token::libname: + paths.push_back(Path(_tok._range, false, true)); + consumeToken(); + break; + case Token::kw_as_needed: + if (!parseAsNeeded(paths)) + return nullptr; + break; + default: + llvm_unreachable("Invalid token."); + } + } + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return new (_alloc) T(*this, paths); +} + +// Parse AS_NEEDED(file ...) +bool Parser::parseAsNeeded(SmallVectorImpl<Path> &paths) { + assert(_tok._kind == Token::kw_as_needed && "Expected AS_NEEDED!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return false; + + while (_tok._kind == Token::identifier || _tok._kind == Token::libname) { + switch (_tok._kind) { + case Token::identifier: + paths.push_back(Path(_tok._range, true, false)); + consumeToken(); + break; + case Token::libname: + paths.push_back(Path(_tok._range, true, true)); + consumeToken(); + break; + default: + llvm_unreachable("Invalid token."); + } + } + + if (!expectAndConsume(Token::r_paren, "expected )")) + return false; + return true; +} + +// Parse ENTRY(ident) +Entry *Parser::parseEntry() { + assert(_tok._kind == Token::kw_entry && "Expected ENTRY!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + if (_tok._kind != Token::identifier) { + error(_tok, "expected identifier in ENTRY"); + return nullptr; + } + StringRef entryName(_tok._range); + consumeToken(); + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return new (_alloc) Entry(*this, entryName); +} + +// Parse SEARCH_DIR(ident) +SearchDir *Parser::parseSearchDir() { + assert(_tok._kind == Token::kw_search_dir && "Expected SEARCH_DIR!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + if (_tok._kind != Token::identifier) { + error(_tok, "expected identifier in SEARCH_DIR"); + return nullptr; + } + StringRef searchPath(_tok._range); + consumeToken(); + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return new (_alloc) SearchDir(*this, searchPath); +} + +const SymbolAssignment *Parser::parseSymbolAssignment() { + assert((_tok._kind == Token::identifier || _tok._kind == Token::kw_hidden || + _tok._kind == Token::kw_provide || + _tok._kind == Token::kw_provide_hidden) && + "Expected identifier!"); + SymbolAssignment::AssignmentVisibility visibility = SymbolAssignment::Default; + SymbolAssignment::AssignmentKind kind; + int numParen = 0; + + switch (_tok._kind) { + case Token::kw_hidden: + visibility = SymbolAssignment::Hidden; + ++numParen; + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + break; + case Token::kw_provide: + visibility = SymbolAssignment::Provide; + ++numParen; + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + break; + case Token::kw_provide_hidden: + visibility = SymbolAssignment::ProvideHidden; + ++numParen; + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + break; + default: + break; + } + + StringRef name = _tok._range; + consumeToken(); + + // Parse assignment operator (=, +=, -= etc.) + switch (_tok._kind) { + case Token::equal: + kind = SymbolAssignment::Simple; + break; + case Token::plusequal: + kind = SymbolAssignment::Sum; + break; + case Token::minusequal: + kind = SymbolAssignment::Sub; + break; + case Token::starequal: + kind = SymbolAssignment::Mul; + break; + case Token::slashequal: + kind = SymbolAssignment::Div; + break; + case Token::ampequal: + kind = SymbolAssignment::And; + break; + case Token::pipeequal: + kind = SymbolAssignment::Or; + break; + case Token::lesslessequal: + kind = SymbolAssignment::Shl; + break; + case Token::greatergreaterequal: + kind = SymbolAssignment::Shr; + break; + default: + error(_tok, "unexpected token"); + return nullptr; + } + + consumeToken(); + + const Expression *expr = nullptr; + switch (_tok._kind) { + case Token::number: + case Token::kw_align: + case Token::identifier: + case Token::l_paren: + expr = parseExpression(); + if (!expr) + return nullptr; + break; + default: + error(_tok, "unexpected token while parsing assignment value."); + return nullptr; + } + + for (int i = 0; i < numParen; ++i) + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + + return new (_alloc) SymbolAssignment(*this, name, expr, kind, visibility); +} + +llvm::ErrorOr<InputSectionsCmd::VectorTy> Parser::parseExcludeFile() { + assert(_tok._kind == Token::kw_exclude_file && "Expected EXCLUDE_FILE!"); + InputSectionsCmd::VectorTy res; + consumeToken(); + + if (!expectAndConsume(Token::l_paren, "expected (")) + return llvm::ErrorOr<InputSectionsCmd::VectorTy>( + std::make_error_code(std::errc::io_error)); + + while (_tok._kind == Token::identifier) { + res.push_back(new (_alloc) InputSectionName(*this, _tok._range, true)); + consumeToken(); + } + + if (!expectAndConsume(Token::r_paren, "expected )")) + return llvm::ErrorOr<InputSectionsCmd::VectorTy>( + std::make_error_code(std::errc::io_error)); + return llvm::ErrorOr<InputSectionsCmd::VectorTy>(std::move(res)); +} + +int Parser::parseSortDirectives(WildcardSortMode &sortMode) { + int numParsedDirectives = 0; + sortMode = WildcardSortMode::NA; + + if (_tok._kind == Token::kw_sort_by_name) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + sortMode = WildcardSortMode::ByName; + } + + if (_tok._kind == Token::kw_sort_by_init_priority) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + sortMode = WildcardSortMode::ByInitPriority; + } + + if (_tok._kind == Token::kw_sort_by_alignment) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + if (sortMode != WildcardSortMode::ByName) + sortMode = WildcardSortMode::ByAlignment; + else + sortMode = WildcardSortMode::ByNameAndAlignment; + } + + if (numParsedDirectives < 2 && _tok._kind == Token::kw_sort_by_name) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + if (sortMode == WildcardSortMode::ByAlignment) + sortMode = WildcardSortMode::ByAlignmentAndName; + } + + if (numParsedDirectives < 2 && _tok._kind == Token::kw_sort_by_alignment) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + } + + if (numParsedDirectives == 0 && _tok._kind == Token::kw_sort_none) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return -1; + ++numParsedDirectives; + sortMode = WildcardSortMode::None; + } + + return numParsedDirectives; +} + +const InputSection *Parser::parseSortedInputSections() { + assert((_tok._kind == Token::kw_sort_by_name || + _tok._kind == Token::kw_sort_by_alignment || + _tok._kind == Token::kw_sort_by_init_priority || + _tok._kind == Token::kw_sort_none) && + "Expected SORT directives!"); + + WildcardSortMode sortMode = WildcardSortMode::NA; + int numParen = parseSortDirectives(sortMode); + if (numParen == -1) + return nullptr; + + SmallVector<const InputSection *, 8> inputSections; + + while (_tok._kind == Token::identifier) { + inputSections.push_back(new (_alloc) + InputSectionName(*this, _tok._range, false)); + consumeToken(); + } + + // Eat "numParen" rparens + for (int i = 0, e = numParen; i != e; ++i) + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + + return new (_alloc) InputSectionSortedGroup(*this, sortMode, inputSections); +} + +const InputSectionsCmd *Parser::parseInputSectionsCmd() { + assert((_tok._kind == Token::identifier || _tok._kind == Token::colon || + _tok._kind == Token::star || _tok._kind == Token::kw_keep || + _tok._kind == Token::kw_sort_by_name || + _tok._kind == Token::kw_sort_by_alignment || + _tok._kind == Token::kw_sort_by_init_priority || + _tok._kind == Token::kw_sort_none) && + "Expected input section first tokens!"); + int numParen = 1; + bool keep = false; + WildcardSortMode fileSortMode = WildcardSortMode::NA; + WildcardSortMode archiveSortMode = WildcardSortMode::NA; + StringRef memberName; + StringRef archiveName; + + if (_tok._kind == Token::kw_keep) { + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + ++numParen; + keep = true; + } + + // Input name + if (_tok._kind != Token::colon) { + int numParen = parseSortDirectives(fileSortMode); + if (numParen == -1) + return nullptr; + memberName = _tok._range; + consumeToken(); + if (numParen) { + while (numParen--) + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + } + } + if (_tok._kind == Token::colon) { + consumeToken(); + if (_tok._kind == Token::identifier || + _tok._kind == Token::kw_sort_by_name || + _tok._kind == Token::kw_sort_by_alignment || + _tok._kind == Token::kw_sort_by_init_priority || + _tok._kind == Token::kw_sort_none) { + int numParen = parseSortDirectives(archiveSortMode); + if (numParen == -1) + return nullptr; + archiveName = _tok._range; + consumeToken(); + for (int i = 0; i != numParen; ++i) + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + } + } + + SmallVector<const InputSection *, 8> inputSections; + + if (_tok._kind != Token::l_paren) + return new (_alloc) + InputSectionsCmd(*this, memberName, archiveName, keep, fileSortMode, + archiveSortMode, inputSections); + consumeToken(); + + while (_tok._kind == Token::identifier || + _tok._kind == Token::kw_exclude_file || + _tok._kind == Token::kw_sort_by_name || + _tok._kind == Token::kw_sort_by_alignment || + _tok._kind == Token::kw_sort_by_init_priority || + _tok._kind == Token::kw_sort_none) { + switch (_tok._kind) { + case Token::kw_exclude_file: { + auto vec = parseExcludeFile(); + if (vec.getError()) + return nullptr; + inputSections.insert(inputSections.end(), vec->begin(), vec->end()); + break; + } + case Token::star: + case Token::identifier: { + inputSections.push_back(new (_alloc) + InputSectionName(*this, _tok._range, false)); + consumeToken(); + break; + } + case Token::kw_sort_by_name: + case Token::kw_sort_by_alignment: + case Token::kw_sort_by_init_priority: + case Token::kw_sort_none: { + const InputSection *group = parseSortedInputSections(); + if (!group) + return nullptr; + inputSections.push_back(group); + break; + } + default: + llvm_unreachable("Unknown token"); + } + } + + for (int i = 0; i < numParen; ++i) + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + return new (_alloc) + InputSectionsCmd(*this, memberName, archiveName, keep, fileSortMode, + archiveSortMode, inputSections); +} + +const OutputSectionDescription *Parser::parseOutputSectionDescription() { + assert((_tok._kind == Token::kw_discard || _tok._kind == Token::identifier) && + "Expected /DISCARD/ or identifier!"); + StringRef sectionName; + const Expression *address = nullptr; + const Expression *align = nullptr; + const Expression *subAlign = nullptr; + const Expression *at = nullptr; + const Expression *fillExpr = nullptr; + StringRef fillStream; + bool alignWithInput = false; + bool discard = false; + OutputSectionDescription::Constraint constraint = + OutputSectionDescription::C_None; + SmallVector<const Command *, 8> outputSectionCommands; + + if (_tok._kind == Token::kw_discard) + discard = true; + else + sectionName = _tok._range; + consumeToken(); + + if (_tok._kind == Token::number || _tok._kind == Token::identifier || + _tok._kind == Token::kw_align || _tok._kind == Token::l_paren) { + address = parseExpression(); + if (!address) + return nullptr; + } + + if (!expectAndConsume(Token::colon, "expected :")) + return nullptr; + + if (_tok._kind == Token::kw_at) { + consumeToken(); + at = parseExpression(); + if (!at) + return nullptr; + } + + if (_tok._kind == Token::kw_align) { + consumeToken(); + align = parseExpression(); + if (!align) + return nullptr; + } + + if (_tok._kind == Token::kw_align_with_input) { + consumeToken(); + alignWithInput = true; + } + + if (_tok._kind == Token::kw_subalign) { + consumeToken(); + subAlign = parseExpression(); + if (!subAlign) + return nullptr; + } + + if (_tok._kind == Token::kw_only_if_ro) { + consumeToken(); + constraint = OutputSectionDescription::C_OnlyIfRO; + } else if (_tok._kind == Token::kw_only_if_rw) { + consumeToken(); + constraint = OutputSectionDescription::C_OnlyIfRW; + } + + if (!expectAndConsume(Token::l_brace, "expected {")) + return nullptr; + + // Parse zero or more output-section-commands + while (_tok._kind != Token::r_brace) { + switch (_tok._kind) { + case Token::semicolon: + consumeToken(); + break; + case Token::identifier: + switch (peek()._kind) { + case Token::equal: + case Token::plusequal: + case Token::minusequal: + case Token::starequal: + case Token::slashequal: + case Token::ampequal: + case Token::pipeequal: + case Token::lesslessequal: + case Token::greatergreaterequal: + if (const Command *cmd = parseSymbolAssignment()) + outputSectionCommands.push_back(cmd); + else + return nullptr; + break; + default: + if (const Command *cmd = parseInputSectionsCmd()) + outputSectionCommands.push_back(cmd); + else + return nullptr; + break; + } + break; + case Token::kw_keep: + case Token::star: + case Token::colon: + case Token::kw_sort_by_name: + case Token::kw_sort_by_alignment: + case Token::kw_sort_by_init_priority: + case Token::kw_sort_none: + if (const Command *cmd = parseInputSectionsCmd()) + outputSectionCommands.push_back(cmd); + else + return nullptr; + break; + case Token::kw_hidden: + case Token::kw_provide: + case Token::kw_provide_hidden: + if (const Command *cmd = parseSymbolAssignment()) + outputSectionCommands.push_back(cmd); + else + return nullptr; + break; + default: + error(_tok, "expected symbol assignment or input file name."); + return nullptr; + } + } + + if (!expectAndConsume(Token::r_brace, "expected }")) + return nullptr; + + if (_tok._kind == Token::equal) { + consumeToken(); + if (_tok._kind != Token::number || !_tok._range.startswith_lower("0x")) { + fillExpr = parseExpression(); + if (!fillExpr) + return nullptr; + } else { + std::string strBuf; + if (isExpressionOperator(peek()) || + !parseHexToByteStream(_tok._range.drop_front(2), strBuf)) { + fillExpr = parseExpression(); + if(!fillExpr) + return nullptr; + } else { + char *rawBuf = (char *) _alloc.Allocate(strBuf.size(), 1); + memcpy(rawBuf, strBuf.c_str(), strBuf.size()); + fillStream = StringRef(rawBuf, strBuf.size()); + consumeToken(); + } + } + } + + return new (_alloc) OutputSectionDescription( + *this, sectionName, address, align, subAlign, at, fillExpr, fillStream, + alignWithInput, discard, constraint, outputSectionCommands); +} + +const Overlay *Parser::parseOverlay() { + assert(_tok._kind == Token::kw_overlay && "Expected OVERLAY!"); + error(_tok, "Overlay description is not yet supported."); + return nullptr; +} + +Sections *Parser::parseSections() { + assert(_tok._kind == Token::kw_sections && "Expected SECTIONS!"); + consumeToken(); + if (!expectAndConsume(Token::l_brace, "expected {")) + return nullptr; + SmallVector<const Command *, 8> sectionsCommands; + + bool unrecognizedToken = false; + // Parse zero or more sections-commands + while (!unrecognizedToken) { + switch (_tok._kind) { + case Token::semicolon: + consumeToken(); + break; + + case Token::identifier: + switch (peek()._kind) { + case Token::equal: + case Token::plusequal: + case Token::minusequal: + case Token::starequal: + case Token::slashequal: + case Token::ampequal: + case Token::pipeequal: + case Token::lesslessequal: + case Token::greatergreaterequal: + if (const Command *cmd = parseSymbolAssignment()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + default: + if (const Command *cmd = parseOutputSectionDescription()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + } + break; + + case Token::kw_discard: + case Token::star: + if (const Command *cmd = parseOutputSectionDescription()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + + case Token::kw_entry: + if (const Command *cmd = parseEntry()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + + case Token::kw_hidden: + case Token::kw_provide: + case Token::kw_provide_hidden: + if (const Command *cmd = parseSymbolAssignment()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + + case Token::kw_overlay: + if (const Command *cmd = parseOverlay()) + sectionsCommands.push_back(cmd); + else + return nullptr; + break; + + default: + unrecognizedToken = true; + break; + } + } + + if (!expectAndConsume( + Token::r_brace, + "expected symbol assignment, entry, overlay or output section name.")) + return nullptr; + + return new (_alloc) Sections(*this, sectionsCommands); +} + +Memory *Parser::parseMemory() { + assert(_tok._kind == Token::kw_memory && "Expected MEMORY!"); + consumeToken(); + if (!expectAndConsume(Token::l_brace, "expected {")) + return nullptr; + SmallVector<const MemoryBlock *, 8> blocks; + + bool unrecognizedToken = false; + // Parse zero or more memory block descriptors. + while (!unrecognizedToken) { + if (_tok._kind == Token::identifier) { + StringRef name; + StringRef attrs; + const Expression *origin = nullptr; + const Expression *length = nullptr; + + name = _tok._range; + consumeToken(); + + // Parse optional memory region attributes. + if (_tok._kind == Token::l_paren) { + consumeToken(); + + if (_tok._kind != Token::identifier) { + error(_tok, "Expected memory attribute string."); + return nullptr; + } + attrs = _tok._range; + consumeToken(); + + if (!expectAndConsume(Token::r_paren, "expected )")) + return nullptr; + } + + if (!expectAndConsume(Token::colon, "expected :")) + return nullptr; + + // Parse the ORIGIN (base address of memory block). + if (!expectAndConsume(Token::kw_origin, "expected ORIGIN")) + return nullptr; + + if (!expectAndConsume(Token::equal, "expected =")) + return nullptr; + + origin = parseExpression(); + if (!origin) + return nullptr; + + if (!expectAndConsume(Token::comma, "expected ,")) + return nullptr; + + // Parse the LENGTH (length of memory block). + if (!expectAndConsume(Token::kw_length, "expected LENGTH")) + return nullptr; + + if (!expectAndConsume(Token::equal, "expected =")) + return nullptr; + + length = parseExpression(); + if (!length) + return nullptr; + + MemoryBlock *block = + new (_alloc) MemoryBlock(name, attrs, origin, length); + blocks.push_back(block); + } else { + unrecognizedToken = true; + } + } + if (!expectAndConsume( + Token::r_brace, + "expected memory block definition.")) + return nullptr; + + return new (_alloc) Memory(*this, blocks); +} + +Extern *Parser::parseExtern() { + assert(_tok._kind == Token::kw_extern && "Expected EXTERN!"); + consumeToken(); + if (!expectAndConsume(Token::l_paren, "expected (")) + return nullptr; + + // Parse one or more symbols. + SmallVector<StringRef, 8> symbols; + if (_tok._kind != Token::identifier) { + error(_tok, "expected one or more symbols in EXTERN."); + return nullptr; + } + symbols.push_back(_tok._range); + consumeToken(); + while (_tok._kind == Token::identifier) { + symbols.push_back(_tok._range); + consumeToken(); + } + + if (!expectAndConsume(Token::r_paren, "expected symbol in EXTERN.")) + return nullptr; + + return new (_alloc) Extern(*this, symbols); +} + +// Sema member functions +Sema::Sema() + : _scripts(), _layoutCommands(), _memberToLayoutOrder(), + _memberNameWildcards(), _cacheSectionOrder(), _cacheExpressionOrder(), + _deliveredExprs(), _symbolTable() {} + +void Sema::perform() { + for (auto &parser : _scripts) + perform(parser->get()); +} + +bool Sema::less(const SectionKey &lhs, const SectionKey &rhs) const { + int a = getLayoutOrder(lhs, true); + int b = getLayoutOrder(rhs, true); + + if (a != b) { + if (a < 0) + return false; + if (b < 0) + return true; + return a < b; + } + + // If both sections are not mapped anywhere, they have the same order + if (a < 0) + return false; + + // If both sections fall into the same layout order, we need to find their + // relative position as written in the (InputSectionsCmd). + return localCompare(a, lhs, rhs); +} + +StringRef Sema::getOutputSection(const SectionKey &key) const { + int layoutOrder = getLayoutOrder(key, true); + if (layoutOrder < 0) + return StringRef(); + + for (int i = layoutOrder - 1; i >= 0; --i) { + if (!isa<OutputSectionDescription>(_layoutCommands[i])) + continue; + + const OutputSectionDescription *out = + dyn_cast<OutputSectionDescription>(_layoutCommands[i]); + return out->name(); + } + + return StringRef(); +} + +std::vector<const SymbolAssignment *> +Sema::getExprs(const SectionKey &key) { + int layoutOrder = getLayoutOrder(key, false); + auto ans = std::vector<const SymbolAssignment *>(); + + if (layoutOrder < 0 || _deliveredExprs.count(layoutOrder) > 0) + return ans; + + for (int i = layoutOrder - 1; i >= 0; --i) { + if (isa<InputSection>(_layoutCommands[i])) + break; + if (auto assgn = dyn_cast<SymbolAssignment>(_layoutCommands[i])) + ans.push_back(assgn); + } + + // Reverse this order so we evaluate the expressions in the original order + // of the linker script + std::reverse(ans.begin(), ans.end()); + + // Mark this layout number as delivered + _deliveredExprs.insert(layoutOrder); + return ans; +} + +std::error_code Sema::evalExpr(const SymbolAssignment *assgn, + uint64_t &curPos) { + _symbolTable[StringRef(".")] = curPos; + + auto ans = assgn->expr()->evalExpr(_symbolTable); + if (ans.getError()) + return ans.getError(); + uint64_t result = *ans; + + if (assgn->symbol() == ".") { + curPos = result; + return std::error_code(); + } + + _symbolTable[assgn->symbol()] = result; + return std::error_code(); +} + +const llvm::StringSet<> &Sema::getScriptDefinedSymbols() const { + // Do we have cached results? + if (!_definedSymbols.empty()) + return _definedSymbols; + + // Populate our defined set and return it + for (auto cmd : _layoutCommands) + if (auto sa = dyn_cast<SymbolAssignment>(cmd)) { + StringRef symbol = sa->symbol(); + if (!symbol.empty() && symbol != ".") + _definedSymbols.insert(symbol); + } + + return _definedSymbols; +} + +uint64_t Sema::getLinkerScriptExprValue(StringRef name) const { + auto it = _symbolTable.find(name); + assert (it != _symbolTable.end() && "Invalid symbol name!"); + return it->second; +} + +void Sema::dump() const { + raw_ostream &os = llvm::outs(); + os << "Linker script semantics dump\n"; + int num = 0; + for (auto &parser : _scripts) { + os << "Dumping script #" << ++num << ":\n"; + parser->get()->dump(os); + os << "\n"; + } + os << "Dumping rule ids:\n"; + for (unsigned i = 0; i < _layoutCommands.size(); ++i) { + os << "LayoutOrder " << i << ":\n"; + _layoutCommands[i]->dump(os); + os << "\n\n"; + } +} + +/// Given a string "pattern" with wildcard characters, return true if it +/// matches "name". This function is useful when checking if a given name +/// pattern written in the linker script, i.e. ".text*", should match +/// ".text.anytext". +static bool wildcardMatch(StringRef pattern, StringRef name) { + auto i = name.begin(); + + // Check if each char in pattern also appears in our input name, handling + // special wildcard characters. + for (auto j = pattern.begin(), e = pattern.end(); j != e; ++j) { + if (i == name.end()) + return false; + + switch (*j) { + case '*': + while (!wildcardMatch(pattern.drop_front(j - pattern.begin() + 1), + name.drop_front(i - name.begin() + 1))) { + if (i == name.end()) + return false; + ++i; + } + break; + case '?': + // Matches any character + break; + case '[': { + // Matches a range of characters specified between brackets + size_t end = pattern.find(']', j - pattern.begin()); + if (end == pattern.size()) + return false; + + StringRef chars = pattern.slice(j - pattern.begin(), end); + if (chars.find(i) == StringRef::npos) + return false; + + j = pattern.begin() + end; + break; + } + case '\\': + ++j; + if (*j != *i) + return false; + break; + default: + // No wildcard character means we must match exactly the same char + if (*j != *i) + return false; + break; + } + ++i; + } + + // If our pattern has't consumed the entire string, it is not a match + return i == name.end(); +} + +int Sema::matchSectionName(int id, const SectionKey &key) const { + const InputSectionsCmd *cmd = dyn_cast<InputSectionsCmd>(_layoutCommands[id]); + + if (!cmd || !wildcardMatch(cmd->archiveName(), key.archivePath)) + return -1; + + while ((size_t)++id < _layoutCommands.size() && + (isa<InputSection>(_layoutCommands[id]))) { + if (isa<InputSectionSortedGroup>(_layoutCommands[id])) + continue; + + const InputSectionName *in = + dyn_cast<InputSectionName>(_layoutCommands[id]); + if (wildcardMatch(in->name(), key.sectionName)) + return id; + } + return -1; +} + +int Sema::getLayoutOrder(const SectionKey &key, bool coarse) const { + // First check if we already answered this layout question + if (coarse) { + auto entry = _cacheSectionOrder.find(key); + if (entry != _cacheSectionOrder.end()) + return entry->second; + } else { + auto entry = _cacheExpressionOrder.find(key); + if (entry != _cacheExpressionOrder.end()) + return entry->second; + } + + // Try to match exact file name + auto range = _memberToLayoutOrder.equal_range(key.memberPath); + for (auto I = range.first, E = range.second; I != E; ++I) { + int order = I->second; + int exprOrder = -1; + + if ((exprOrder = matchSectionName(order, key)) >= 0) { + if (coarse) { + _cacheSectionOrder.insert(std::make_pair(key, order)); + return order; + } + _cacheExpressionOrder.insert(std::make_pair(key, exprOrder)); + return exprOrder; + } + } + + // If we still couldn't find a rule for this input section, try to match + // wildcards + for (auto I = _memberNameWildcards.begin(), E = _memberNameWildcards.end(); + I != E; ++I) { + if (!wildcardMatch(I->first, key.memberPath)) + continue; + int order = I->second; + int exprOrder = -1; + + if ((exprOrder = matchSectionName(order, key)) >= 0) { + if (coarse) { + _cacheSectionOrder.insert(std::make_pair(key, order)); + return order; + } + _cacheExpressionOrder.insert(std::make_pair(key, exprOrder)); + return exprOrder; + } + } + + _cacheSectionOrder.insert(std::make_pair(key, -1)); + _cacheExpressionOrder.insert(std::make_pair(key, -1)); + return -1; +} + +static bool compareSortedNames(WildcardSortMode sortMode, StringRef lhs, + StringRef rhs) { + switch (sortMode) { + case WildcardSortMode::None: + case WildcardSortMode::NA: + return false; + case WildcardSortMode::ByAlignment: + case WildcardSortMode::ByInitPriority: + case WildcardSortMode::ByAlignmentAndName: + assert(false && "Unimplemented sort order"); + break; + case WildcardSortMode::ByName: + return lhs.compare(rhs) < 0; + case WildcardSortMode::ByNameAndAlignment: + int compare = lhs.compare(rhs); + if (compare != 0) + return compare < 0; + return compareSortedNames(WildcardSortMode::ByAlignment, lhs, rhs); + } + return false; +} + +static bool sortedGroupContains(const InputSectionSortedGroup *cmd, + const Sema::SectionKey &key) { + for (const InputSection *child : *cmd) { + if (auto i = dyn_cast<InputSectionName>(child)) { + if (wildcardMatch(i->name(), key.sectionName)) + return true; + continue; + } + + auto *sortedGroup = dyn_cast<InputSectionSortedGroup>(child); + assert(sortedGroup && "Expected InputSectionSortedGroup object"); + + if (sortedGroupContains(sortedGroup, key)) + return true; + } + + return false; +} + +bool Sema::localCompare(int order, const SectionKey &lhs, + const SectionKey &rhs) const { + const InputSectionsCmd *cmd = + dyn_cast<InputSectionsCmd>(_layoutCommands[order]); + + assert(cmd && "Invalid InputSectionsCmd index"); + + if (lhs.archivePath != rhs.archivePath) + return compareSortedNames(cmd->archiveSortMode(), lhs.archivePath, + rhs.archivePath); + + if (lhs.memberPath != rhs.memberPath) + return compareSortedNames(cmd->fileSortMode(), lhs.memberPath, + rhs.memberPath); + + // Both sections come from the same exact same file and rule. Start walking + // through input section names as written in the linker script and the + // first one to match will have higher priority. + for (const InputSection *inputSection : *cmd) { + if (auto i = dyn_cast<InputSectionName>(inputSection)) { + // If both match, return false (both have equal priority) + // If rhs match, return false (rhs has higher priority) + if (wildcardMatch(i->name(), rhs.sectionName)) + return false; + // If lhs matches first, it has priority over rhs + if (wildcardMatch(i->name(), lhs.sectionName)) + return true; + continue; + } + + // Handle sorted subgroups specially + auto *sortedGroup = dyn_cast<InputSectionSortedGroup>(inputSection); + assert(sortedGroup && "Expected InputSectionSortedGroup object"); + + bool a = sortedGroupContains(sortedGroup, lhs); + bool b = sortedGroupContains(sortedGroup, rhs); + if (a && !b) + return false; + if (b && !a) + return true; + if (!a && !a) + continue; + + return compareSortedNames(sortedGroup->sortMode(), lhs.sectionName, + rhs.sectionName); + } + + llvm_unreachable(""); + return false; +} + +static bool hasWildcard(StringRef name) { + for (auto ch : name) + if (ch == '*' || ch == '?' || ch == '[' || ch == '\\') + return true; + return false; +} + +void Sema::linearizeAST(const InputSection *inputSection) { + if (isa<InputSectionName>(inputSection)) { + _layoutCommands.push_back(inputSection); + return; + } + + auto *sortedGroup = dyn_cast<InputSectionSortedGroup>(inputSection); + assert(sortedGroup && "Expected InputSectionSortedGroup object"); + + for (const InputSection *child : *sortedGroup) { + linearizeAST(child); + } +} + +void Sema::linearizeAST(const InputSectionsCmd *inputSections) { + StringRef memberName = inputSections->memberName(); + // Populate our maps for fast lookup of InputSectionsCmd + if (hasWildcard(memberName)) + _memberNameWildcards.push_back( + std::make_pair(memberName, (int)_layoutCommands.size())); + else if (!memberName.empty()) + _memberToLayoutOrder.insert( + std::make_pair(memberName.str(), (int)_layoutCommands.size())); + + _layoutCommands.push_back(inputSections); + for (const InputSection *inputSection : *inputSections) + linearizeAST(inputSection); +} + +void Sema::linearizeAST(const Sections *sections) { + for (const Command *sectionCommand : *sections) { + if (isa<SymbolAssignment>(sectionCommand)) { + _layoutCommands.push_back(sectionCommand); + continue; + } + + if (!isa<OutputSectionDescription>(sectionCommand)) + continue; + + _layoutCommands.push_back(sectionCommand); + auto *outSection = dyn_cast<OutputSectionDescription>(sectionCommand); + + for (const Command *outSecCommand : *outSection) { + if (isa<SymbolAssignment>(outSecCommand)) { + _layoutCommands.push_back(outSecCommand); + continue; + } + + if (!isa<InputSectionsCmd>(outSecCommand)) + continue; + + linearizeAST(dyn_cast<InputSectionsCmd>(outSecCommand)); + } + } +} + +void Sema::perform(const LinkerScript *ls) { + for (const Command *c : ls->_commands) { + if (const Sections *sec = dyn_cast<Sections>(c)) + linearizeAST(sec); + } +} + +} // End namespace script +} // end namespace lld diff --git a/lib/ReaderWriter/MachO/ArchHandler.cpp b/lib/ReaderWriter/MachO/ArchHandler.cpp new file mode 100644 index 000000000000..cb20907b3e30 --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler.cpp @@ -0,0 +1,172 @@ +//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + + +ArchHandler::ArchHandler() { +} + +ArchHandler::~ArchHandler() { +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create( + MachOLinkingContext::Arch arch) { + switch (arch) { + case MachOLinkingContext::arch_x86_64: + return create_x86_64(); + case MachOLinkingContext::arch_x86: + return create_x86(); + case MachOLinkingContext::arch_armv6: + case MachOLinkingContext::arch_armv7: + case MachOLinkingContext::arch_armv7s: + return create_arm(); + case MachOLinkingContext::arch_arm64: + return create_arm64(); + default: + llvm_unreachable("Unknown arch"); + } +} + + +bool ArchHandler::isLazyPointer(const Reference &ref) { + // A lazy bind entry is needed for a lazy pointer. + const StubInfo &info = stubInfo(); + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch) + return false; + return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind); +} + + +ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) { + assert((reloc.type & 0xFFF0) == 0); + uint16_t result = reloc.type; + if (reloc.scattered) + result |= rScattered; + if (reloc.pcRel) + result |= rPcRel; + if (reloc.isExtern) + result |= rExtern; + switch(reloc.length) { + case 0: + break; + case 1: + result |= rLength2; + break; + case 2: + result |= rLength4; + break; + case 3: + result |= rLength8; + break; + default: + llvm_unreachable("bad r_length"); + } + return result; +} + +normalized::Relocation +ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) { + normalized::Relocation result; + result.offset = 0; + result.scattered = (pattern & rScattered); + result.type = (RelocationInfoType)(pattern & 0xF); + result.pcRel = (pattern & rPcRel); + result.isExtern = (pattern & rExtern); + result.value = 0; + result.symbol = 0; + switch (pattern & 0x300) { + case rLength1: + result.length = 0; + break; + case rLength2: + result.length = 1; + break; + case rLength4: + result.length = 2; + break; + case rLength8: + result.length = 3; + break; + } + return result; +} + +void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern) { + normalized::Relocation reloc = relocFromPattern(pattern); + reloc.offset = offset; + reloc.symbol = symbol; + reloc.value = value; + relocs.push_back(reloc); +} + + +int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) { + return read16(addr, isBig); +} + +int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + +uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + + int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) { + return read64(addr, isBig); +} + +bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) { + assert(atom->contentType() == DefinedAtom::typeCFI); + if (atom->rawContent().size() < sizeof(uint32_t)) + return false; + uint32_t size = read32(atom->rawContent().data(), isBig); + + uint32_t idOffset = sizeof(uint32_t); + if (size == 0xffffffffU) + idOffset += sizeof(uint64_t); + + return read32(atom->rawContent().data() + idOffset, isBig) == 0; +} + +const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) { + for (auto ref : *fde) { + if (ref->kindNamespace() == Reference::KindNamespace::mach_o && + ref->kindValue() == unwindRefToFunctionKind()) { + assert(ref->kindArch() == kindArch() && "unexpected Reference arch"); + return ref->target(); + } + } + + return nullptr; +} + +} // namespace mach_o +} // namespace lld + + + diff --git a/lib/ReaderWriter/MachO/ArchHandler.h b/lib/ReaderWriter/MachO/ArchHandler.h new file mode 100644 index 000000000000..7f0961ebc807 --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler.h @@ -0,0 +1,300 @@ +//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/Triple.h" + +#ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H +#define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H + +namespace lld { +namespace mach_o { + +/// +/// The ArchHandler class handles all architecture specific aspects of +/// mach-o linking. +/// +class ArchHandler { +public: + virtual ~ArchHandler(); + + /// There is no public interface to subclasses of ArchHandler, so this + /// is the only way to instantiate an ArchHandler. + static std::unique_ptr<ArchHandler> create(MachOLinkingContext::Arch arch); + + /// Get (arch specific) kind strings used by Registry. + virtual const Registry::KindStrings *kindStrings() = 0; + + /// Convert mach-o Arch to Reference::KindArch. + virtual Reference::KindArch kindArch() = 0; + + /// Used by StubPass to update References to shared library functions + /// to be references to a stub. + virtual bool isCallSite(const Reference &) = 0; + + /// Used by GOTPass to locate GOT References + virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) { + return false; + } + + /// Used by ShimPass to insert shims in branches that switch mode. + virtual bool isNonCallBranch(const Reference &) = 0; + + /// Used by GOTPass to update GOT References + virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {} + + /// Does this architecture make use of __unwind_info sections for exception + /// handling? If so, it will need a separate pass to create them. + virtual bool needsCompactUnwind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// value, used in the __unwind_info section. + virtual Reference::KindValue imageOffsetKind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// indirect value. Used for personality functions in the __unwind_info + /// section. + virtual Reference::KindValue imageOffsetKindIndirect() = 0; + + /// Architecture specific compact unwind type that signals __eh_frame should + /// actually be used. + virtual uint32_t dwarfCompactUnwindType() = 0; + + /// Reference from an __eh_frame FDE to the CIE it's based on. + virtual Reference::KindValue unwindRefToCIEKind() = 0; + + /// Reference from an __eh_frame FDE atom to the function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToFunctionKind() = 0; + + /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the + /// required __eh_frame entry. On current architectures, the low 24 bits + /// represent the offset of the function's FDE entry from the start of + /// __eh_frame. + virtual Reference::KindValue unwindRefToEhFrameKind() = 0; + + virtual const Atom *fdeTargetFunction(const DefinedAtom *fde); + + /// Used by normalizedFromAtoms() to know where to generated rebasing and + /// binding info in final executables. + virtual bool isPointer(const Reference &) = 0; + + /// Used by normalizedFromAtoms() to know where to generated lazy binding + /// info in final executables. + virtual bool isLazyPointer(const Reference &); + + /// Returns true if the specified relocation is paired to the next relocation. + virtual bool isPairedReloc(const normalized::Relocation &) = 0; + + /// Prototype for a helper function. Given a sectionIndex and address, + /// finds the atom and offset with that atom of that address. + typedef std::function<std::error_code (uint32_t sectionIndex, uint64_t addr, + const lld::Atom **, Reference::Addend *)> + FindAtomBySectionAndAddress; + + /// Prototype for a helper function. Given a symbolIndex, finds the atom + /// representing that symbol. + typedef std::function<std::error_code (uint32_t symbolIndex, + const lld::Atom **)> FindAtomBySymbolIndex; + + /// Analyzes a relocation from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual std::error_code + getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBigEndian, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Analyzes a pair of relocations from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual std::error_code + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Prototype for a helper function. Given an atom, finds the symbol table + /// index for it in the output file. + typedef std::function<uint32_t (const Atom &atom)> FindSymbolIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the index + /// of the section that will contain the atom. + typedef std::function<uint32_t (const Atom &atom)> FindSectionIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the address + /// assigned to it in the output file. + typedef std::function<uint64_t (const Atom &atom)> FindAddressForAtom; + + /// Some architectures require local symbols on anonymous atoms. + virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) { + return false; + } + + /// Copy raw content then apply all fixup References on an Atom. + virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) = 0; + + /// Used in -r mode to convert a Reference to a mach-o relocation. + virtual void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations&) = 0; + + /// Add arch-specific References. + virtual void addAdditionalReferences(MachODefinedAtom &atom) { } + + // Add Reference for data-in-code marker. + virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, + uint16_t length, uint16_t kind) { } + + /// Returns true if the specificed Reference value marks the start or end + /// of a data-in-code range in an atom. + virtual bool isDataInCodeTransition(Reference::KindValue refKind) { + return false; + } + + /// Returns the Reference value for a Reference that marks that start of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) { + return 0; + } + + /// Returns the Reference value for a Reference that marks that end of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) { + return 0; + } + + /// Only relevant for 32-bit arm archs. + virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } + + /// Only relevant for 32-bit arm archs. + virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) { + llvm_unreachable("shims only support on arm"); + } + + /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE). + static bool isDwarfCIE(bool isBig, const DefinedAtom *atom); + + struct ReferenceInfo { + Reference::KindArch arch; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + struct OptionalRefInfo { + bool used; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + /// Table of architecture specific information for creating stubs. + struct StubInfo { + const char* binderSymbolName; + ReferenceInfo lazyPointerReferenceToHelper; + ReferenceInfo lazyPointerReferenceToFinal; + ReferenceInfo nonLazyPointerReferenceToBinder; + uint8_t codeAlignment; + + uint32_t stubSize; + uint8_t stubBytes[16]; + ReferenceInfo stubReferenceToLP; + OptionalRefInfo optStubReferenceToLP; + + uint32_t stubHelperSize; + uint8_t stubHelperBytes[16]; + ReferenceInfo stubHelperReferenceToImm; + ReferenceInfo stubHelperReferenceToHelperCommon; + + uint32_t stubHelperCommonSize; + uint8_t stubHelperCommonBytes[36]; + ReferenceInfo stubHelperCommonReferenceToCache; + OptionalRefInfo optStubHelperCommonReferenceToCache; + ReferenceInfo stubHelperCommonReferenceToBinder; + OptionalRefInfo optStubHelperCommonReferenceToBinder; + }; + + virtual const StubInfo &stubInfo() = 0; + +protected: + ArchHandler(); + + static std::unique_ptr<mach_o::ArchHandler> create_x86_64(); + static std::unique_ptr<mach_o::ArchHandler> create_x86(); + static std::unique_ptr<mach_o::ArchHandler> create_arm(); + static std::unique_ptr<mach_o::ArchHandler> create_arm64(); + + // Handy way to pack mach-o r_type and other bit fields into one 16-bit value. + typedef uint16_t RelocPattern; + enum { + rScattered = 0x8000, + rPcRel = 0x4000, + rExtern = 0x2000, + rLength1 = 0x0000, + rLength2 = 0x0100, + rLength4 = 0x0200, + rLength8 = 0x0300, + rLenArmLo = rLength1, + rLenArmHi = rLength2, + rLenThmbLo = rLength4, + rLenThmbHi = rLength8 + }; + /// Extract RelocPattern from normalized mach-o relocation. + static RelocPattern relocPattern(const normalized::Relocation &reloc); + /// Create normalized Relocation initialized from pattern. + static normalized::Relocation relocFromPattern(RelocPattern pattern); + /// One liner to add a relocation. + static void appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern); + + + static int16_t readS16(const uint8_t *addr, bool isBig); + static int32_t readS32(const uint8_t *addr, bool isBig); + static uint32_t readU32(const uint8_t *addr, bool isBig); + static int64_t readS64(const uint8_t *addr, bool isBig); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H diff --git a/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/lib/ReaderWriter/MachO/ArchHandler_arm.cpp new file mode 100644 index 000000000000..43f88a1d30d8 --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -0,0 +1,1524 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::little32_t; + + +class ArchHandler_arm : public ArchHandler { +public: + ArchHandler_arm(); + virtual ~ArchHandler_arm(); + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::ARM; } + + const ArchHandler::StubInfo &stubInfo() override; + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + bool isNonCallBranch(const Reference &) override; + + bool needsCompactUnwind() override { + return false; + } + Reference::KindValue imageOffsetKind() override { + return invalid; + } + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return invalid; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + // FIXME + return -1; + } + + std::error_code getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + std::error_code + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations &) override; + + void addAdditionalReferences(MachODefinedAtom &atom) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeThumbCode: + case modeArmCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return atom.isThumb() ? modeThumbCode : modeArmCode; + } + + bool isThumbFunction(const DefinedAtom &atom) override; + const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) override; + +private: + friend class Thumb2ToArmShimAtom; + friend class ArmToThumbShimAtom; + + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfoArmPIC; + + enum ArmKind : Reference::KindValue { + invalid, /// for error condition + + modeThumbCode, /// Content starting at this offset is thumb. + modeArmCode, /// Content starting at this offset is arm. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + thumb_bl22, /// ex: bl _foo + thumb_b22, /// ex: b _foo + thumb_movw, /// ex: movw r1, :lower16:_foo + thumb_movt, /// ex: movt r1, :lower16:_foo + thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + arm_bl24, /// ex: bl _foo + arm_b24, /// ex: b _foo + arm_movw, /// ex: movw r1, :lower16:_foo + arm_movt, /// ex: movt r1, :lower16:_foo + arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + // Utility functions for inspecting/updating instructions. + static bool isThumbMovw(uint32_t instruction); + static bool isThumbMovt(uint32_t instruction); + static bool isArmMovw(uint32_t instruction); + static bool isArmMovt(uint32_t instruction); + static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t); + static int32_t getDisplacementFromArmBranch(uint32_t instruction); + static uint16_t getWordFromThumbMov(uint32_t instruction); + static uint16_t getWordFromArmMov(uint32_t instruction); + static uint32_t clearThumbBit(uint32_t value, const Atom *target); + static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp, + bool targetIsThumb); + static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia, + int32_t disp, bool targetThumb); + static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word); + static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word); + + StringRef stubName(const DefinedAtom &); + bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_arm +//===----------------------------------------------------------------------===// + +ArchHandler_arm::ArchHandler_arm() { } + +ArchHandler_arm::~ArchHandler_arm() { } + +const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeThumbCode), + LLD_KIND_STRING_ENTRY(modeArmCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(thumb_bl22), + LLD_KIND_STRING_ENTRY(thumb_b22), + LLD_KIND_STRING_ENTRY(thumb_movw), + LLD_KIND_STRING_ENTRY(thumb_movt), + LLD_KIND_STRING_ENTRY(thumb_movw_funcRel), + LLD_KIND_STRING_ENTRY(thumb_movt_funcRel), + LLD_KIND_STRING_ENTRY(arm_bl24), + LLD_KIND_STRING_ENTRY(arm_b24), + LLD_KIND_STRING_ENTRY(arm_movw), + LLD_KIND_STRING_ENTRY(arm_movt), + LLD_KIND_STRING_ENTRY(arm_movw_funcRel), + LLD_KIND_STRING_ENTRY(arm_movt_funcRel), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = { + "dyld_stub_binder", + + // References in lazy pointer + { Reference::KindArch::ARM, pointer32, 0, 0 }, + { Reference::KindArch::ARM, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::ARM, pointer32, 0, 0 }, + + // arm code alignment 2^2 + 2, + + // Stub size and code + 16, + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8) + { Reference::KindArch::ARM, delta32, 12, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 12, + { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0] + 0x00, 0x00, 0x00, 0xEA, // b _helperhelper + 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset + { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::ARM, arm_b24, 4, 0 }, + + // Stub Helper-Common size and code + 36, + { // push lazy-info-offset + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // push address of dyld_mageLoaderCache + 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // jump through dyld_stub_binder + 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16) + 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28) + { Reference::KindArch::ARM, delta32, 28, 0xC }, + { false, 0, 0, 0 }, + { Reference::KindArch::ARM, delta32, 32, 0x04 }, + { false, 0, 0, 0 } +}; + +const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() { + // If multiple kinds of stubs are supported, select which StubInfo here. + return _sStubInfoArmPIC; +} + +bool ArchHandler_arm::isCallSite(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case thumb_bl22: + case arm_b24: + case arm_bl24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_arm::isNonCallBranch(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case arm_b24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) { + switch (reloc.type) { + case ARM_RELOC_SECTDIFF: + case ARM_RELOC_LOCAL_SECTDIFF: + case ARM_RELOC_HALF_SECTDIFF: + case ARM_RELOC_HALF: + return true; + default: + return false; + } +} + +/// Trace references from stub atom to lazy pointer to target and get its name. +StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) { + assert(stubAtom.contentType() == DefinedAtom::typeStub); + for (const Reference *ref : stubAtom) { + if (const DefinedAtom* lp = dyn_cast<DefinedAtom>(ref->target())) { + if (lp->contentType() != DefinedAtom::typeLazyPointer) + continue; + for (const Reference *ref2 : *lp) { + if (ref2->kindValue() != lazyPointer) + continue; + return ref2->target()->name(); + } + } + } + return "stub"; +} + +/// Extract displacement from an ARM b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) { + // Sign-extend imm24 + int32_t displacement = (instruction & 0x00FFFFFF) << 2; + if ((displacement & 0x02000000) != 0) + displacement |= 0xFC000000; + // If this is BLX and H bit set, add 2. + if ((instruction & 0xFF000000) == 0xFB000000) + displacement += 2; + return displacement; +} + +/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 33554428) && (displacement > (-33554432)) + && "arm branch out of range"); + bool is_blx = ((instruction & 0xF0000000) == 0xF0000000); + uint32_t newInstruction = (instruction & 0xFF000000); + uint32_t h = 0; + if (targetIsThumb) { + // Force use of BLX. + newInstruction = 0xFA000000; + if (!is_blx) { + assert(((instruction & 0xF0000000) == 0xE0000000) + && "no conditional arm blx"); + assert(((instruction & 0xFF000000) == 0xEB000000) + && "no arm pc-rel BX instruction"); + } + if (displacement & 2) + h = 1; + } + else { + // Force use of B/BL. + if (is_blx) + newInstruction = 0xEB000000; + } + newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF); + return newInstruction; +} + +/// Extract displacement from a thumb b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction, + uint32_t instrAddr) { + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + uint32_t s = (instruction >> 10) & 0x1; + uint32_t j1 = (instruction >> 29) & 0x1; + uint32_t j2 = (instruction >> 27) & 0x1; + uint32_t imm10 = instruction & 0x3FF; + uint32_t imm11 = (instruction >> 16) & 0x7FF; + uint32_t i1 = (j1 == s); + uint32_t i2 = (j2 == s); + uint32_t dis = + (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1); + int32_t sdis = dis; + int32_t result = s ? (sdis | 0xFE000000) : sdis; + if (is_blx && (instrAddr & 0x2)) { + // The thumb blx instruction always has low bit of imm11 as zero. The way + // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that + // the blx instruction always 4-byte aligns the pc before adding the + // displacement from the blx. We must emulate that when decoding this. + result -= 2; + } + return result; +} + +/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction, + uint32_t instrAddr, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 16777214) && (displacement > (-16777216)) + && "thumb branch out of range"); + bool is_bl = ((instruction & 0xD000F800) == 0xD000F000); + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + bool is_b = ((instruction & 0xD000F800) == 0x9000F000); + uint32_t newInstruction = (instruction & 0xD000F800); + if (is_bl || is_blx) { + if (targetIsThumb) { + newInstruction = 0xD000F000; // Use bl + } else { + newInstruction = 0xC000F000; // Use blx + // See note in getDisplacementFromThumbBranch() about blx. + if (instrAddr & 0x2) + displacement += 2; + } + } else if (is_b) { + assert(targetIsThumb && "no pc-rel thumb branch instruction that " + "switches to arm mode"); + } + else { + llvm_unreachable("thumb branch22 reloc on a non-branch instruction"); + } + uint32_t s = (uint32_t)(displacement >> 24) & 0x1; + uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1; + uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1; + uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF; + uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF; + uint32_t j1 = (i1 == s); + uint32_t j2 = (i2 == s); + uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11; + uint32_t firstDisp = (s << 10) | imm10; + newInstruction |= (nextDisp << 16) | firstDisp; + return newInstruction; +} + +bool ArchHandler_arm::isThumbMovw(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F240; +} + +bool ArchHandler_arm::isThumbMovt(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F2C0; +} + +bool ArchHandler_arm::isArmMovw(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03000000; +} + +bool ArchHandler_arm::isArmMovt(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03400000; +} + + +uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) { + assert(isThumbMovw(instruction) || isThumbMovt(instruction)); + uint32_t i = ((instruction & 0x00000400) >> 10); + uint32_t imm4 = (instruction & 0x0000000F); + uint32_t imm3 = ((instruction & 0x70000000) >> 28); + uint32_t imm8 = ((instruction & 0x00FF0000) >> 16); + return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; +} + +uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) { + assert(isArmMovw(instruction) || isArmMovt(instruction)); + uint32_t imm4 = ((instruction & 0x000F0000) >> 16); + uint32_t imm12 = (instruction & 0x00000FFF); + return (imm4 << 12) | imm12; +} + + +uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) { + assert(isThumbMovw(instr) || isThumbMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t i = (word & 0x0800) >> 11; + uint32_t imm3 = (word & 0x0700) >> 8; + uint32_t imm8 = word & 0x00FF; + return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16); +} + +uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) { + assert(isArmMovw(instr) || isArmMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t imm12 = word & 0x0FFF; + return (instr & 0xFFF0F000) | (imm4 << 16) | imm12; +} + + +uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) { + // The assembler often adds one to the address of a thumb function. + // We need to undo that so it does not look like an addend. + if (value & 1) { + if (isa<DefinedAtom>(target)) { + const MachODefinedAtom *machoTarget = + reinterpret_cast<const MachODefinedAtom *>(target); + if (machoTarget->isThumb()) + value &= -2; // mask off thumb-bit + } + } + return value; +} + +std::error_code ArchHandler_arm::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + typedef std::error_code E; + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + int32_t displacement; + switch (relocPattern(reloc)) { + case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + *addend = fixupAddress + 4 + displacement; + return std::error_code(); + case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return std::error_code(); + case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromArmBranch(instruction); + *addend = fixupAddress + 8 + displacement; + return std::error_code(); + case ARM_RELOC_BR24 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return std::error_code(); + case ARM_RELOC_VANILLA | rExtern | rLength4: + // ex: .long _foo (and _foo is undefined) + *kind = pointer32; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = instruction; + return std::error_code(); + case ARM_RELOC_VANILLA | rLength4: + // ex: .long _foo (and _foo is defined) + *kind = pointer32; + if (E ec = atomFromAddress(reloc.symbol, instruction, target, addend)) + return ec; + *addend = clearThumbBit((uint32_t) * addend, *target); + return std::error_code(); + case ARM_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+a (and _foo is defined) + *kind = pointer32; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend += (clearThumbBit(instruction, *target) - reloc.value); + return std::error_code(); + default: + return make_dynamic_error_code(Twine("unsupported arm relocation type")); + } + return std::error_code(); +} + +std::error_code +ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + bool pointerDiff = false; + bool funcRel; + bool top; + bool thumbReloc; + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbLo): + // ex: movw r1, :lower16:(_x-L1) [thumb mode] + *kind = thumb_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbHi): + // ex: movt r1, :upper16:(_x-L1) [thumb mode] + *kind = thumb_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmLo): + // ex: movw r1, :lower16:(_x-L1) [arm mode] + *kind = arm_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmHi): + // ex: movt r1, :upper16:(_x-L1) [arm mode] + *kind = arm_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x+a [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x+a [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x+a [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x+a [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_undef [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_undef [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_undef [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_undef [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + // ex: .long _foo - . + pointerDiff = true; + break; + default: + return make_dynamic_error_code(Twine("unsupported arm relocation pair")); + } + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + std::error_code ec; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + uint32_t value; + uint32_t fromAddress; + uint32_t toAddress; + uint16_t instruction16; + uint16_t other16; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + if (pointerDiff) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + ec = atomFromAddr(0, toAddress, target, &offsetInTo); + if (ec) + return ec; + ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom); + if (ec) + return ec; + if (scatterable && (fromTarget != inAtom)) + return make_dynamic_error_code(Twine("SECTDIFF relocation where " + "subtrahend label is not in atom")); + *kind = delta32; + value = clearThumbBit(instruction, *target); + *addend = (int32_t)(value - (toAddress - fixupAddress)); + } else if (funcRel) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + ec = atomFromAddr(0, toAddress, target, &offsetInTo); + if (ec) + return ec; + ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom); + if (ec) + return ec; + if (fromTarget != inAtom) + return make_dynamic_error_code( + Twine("ARM_RELOC_HALF_SECTDIFF relocation " + "where subtrahend label is not in atom")); + other16 = (reloc2.offset & 0xFFFF); + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return make_dynamic_error_code(Twine("expected movt instruction")); + } + else { + if (!isThumbMovw(instruction)) + return make_dynamic_error_code(Twine("expected movw instruction")); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return make_dynamic_error_code(Twine("expected movt instruction")); + } + else { + if (!isArmMovw(instruction)) + return make_dynamic_error_code(Twine("expected movw instruction")); + } + instruction16 = getWordFromArmMov(instruction); + } + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + value = clearThumbBit(value, *target); + int64_t ta = (int64_t) value - (toAddress - fromAddress); + *addend = ta - offsetInFrom; + return std::error_code(); + } else { + uint32_t sectIndex; + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return make_dynamic_error_code(Twine("expected movt instruction")); + } + else { + if (!isThumbMovw(instruction)) + return make_dynamic_error_code(Twine("expected movw instruction")); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return make_dynamic_error_code(Twine("expected movt instruction")); + } + else { + if (!isArmMovw(instruction)) + return make_dynamic_error_code(Twine("expected movw instruction")); + } + instruction16 = getWordFromArmMov(instruction); + } + other16 = (reloc2.offset & 0xFFFF); + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + if (reloc1.isExtern) { + ec = atomFromSymbolIndex(reloc1.symbol, target); + if (ec) + return ec; + *addend = value; + } else { + if (reloc1.scattered) { + toAddress = reloc1.value; + sectIndex = 0; + } else { + toAddress = value; + sectIndex = reloc1.symbol; + } + ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo); + if (ec) + return ec; + *addend = value - toAddress; + } + } + + return std::error_code(); +} + +void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + if (targetIsThumb) + *loc32 = targetAddress + ref.addend() + 1; + else + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + if (targetIsThumb) + *loc32 = targetAddress - fixupAddress + ref.addend() + 1; + else + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) { + // Copy raw bytes. + memcpy(atomContentBuffer, atom.rawContent().data(), atom.size()); + // Apply fix-ups. + bool thumbMode = false; + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + bool targetIsThumb = false; + if (const DefinedAtom *defTarg = dyn_cast<DefinedAtom>(target)) { + targetAddress = findAddress(*target); + targetIsThumb = isThumbFunction(*defTarg); + } + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, + targetIsThumb); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, targetIsThumb); + } + } +} + + +bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, + bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + bool targetIsUndef = isa<UndefinedAtom>(ref.target()); + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 4)); + else + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, + targetIsUndef || targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 8)); + else + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, + targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + uint32_t targetAtomAddress; + uint32_t fromAtomAddress; + uint16_t other16; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + case modeArmCode: + case modeData: + // Do nothing. + break; + case thumb_b22: + case thumb_bl22: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_THUMB_RELOC_BR22 | rPcRel | rLength4); + } + break; + case thumb_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } + } + break; + case thumb_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } + } + break; + case thumb_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbLo); + break; + case thumb_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbHi); + break; + case arm_b24: + case arm_bl24: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_BR24 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_BR24 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_BR24 | rPcRel | rLength4); + } + break; + case arm_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } + } + break; + case arm_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } + } + break; + case arm_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmLo); + break; + case arm_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmHi); + break; + case pointer32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_VANILLA | rExtern | rLength4); + } + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_VANILLA | rLength4); + } + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + ARM_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) { + if (atom.isThumb()) { + atom.addReference(0, modeThumbCode, &atom, 0, Reference::KindArch::ARM); + } +} + +bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { + for (const Reference *ref : atom) { + if (ref->offsetInAtom() != 0) + return false; + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + assert(ref->kindArch() == Reference::KindArch::ARM); + if (ref->kindValue() == modeThumbCode) + return true; + } + return false; +} + + +class Thumb2ToArmShimAtom : public SimpleDefinedAtom { +public: + Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::modeThumbCode, 0, this, 0); + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 8, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { + return Alignment(2); + } + + uint64_t size() const override { + return 12; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4 + 0xFF, 0x44, // add ip, pc, ip + 0x60, 0x47, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + + +class ArmToThumbShimAtom : public SimpleDefinedAtom { +public: + ArmToThumbShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 12, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { + return Alignment(2); + } + + uint64_t size() const override { + return 16; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file, + bool thumbToArm, + const DefinedAtom &target) { + bool isStub = (target.contentType() == DefinedAtom::typeStub); + StringRef targetName = isStub ? stubName(target) : target.name(); + if (thumbToArm) + return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target); + else + return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target); +} + + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp new file mode 100644 index 000000000000..fd9984b89ce6 --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -0,0 +1,822 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_arm64 : public ArchHandler { +public: + ArchHandler_arm64(); + virtual ~ArchHandler_arm64(); + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::AArch64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case gotPage21: + case gotOffset12: + canBypassGOT = true; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + /// Used by GOTPass to update GOT References. + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + // If GOT slot was instanciated, transform: + // gotPage21/gotOffset12 -> page21/offset12scale8 + // If GOT slot optimized away, transform: + // gotPage21/gotOffset12 -> page21/addOffset12 + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::AArch64); + switch (ref->kindValue()) { + case gotPage21: + const_cast<Reference *>(ref)->setKindValue(page21); + break; + case gotOffset12: + const_cast<Reference *>(ref)->setKindValue(targetNowGOT ? + offset12scale8 : addOffset12); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("Not a GOT reference"); + } + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return true; + } + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x03000000; + } + + std::error_code getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + std::error_code + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum Arm64Kind : Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch26, /// ex: bl _foo + page21, /// ex: adrp x1, _foo@PAGE + offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF] + offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF] + offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF] + offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF] + offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF] + gotPage21, /// ex: adrp x1, _foo@GOTPAGE + gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF] + tlvPage21, /// ex: adrp x1, _foo@TLVPAGE + tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF] + + pointer64, /// ex: .quad _foo + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + pointer64ToGOT, /// ex: .quad _foo@GOT + delta32ToGOT, /// ex: .long _foo@GOT - . + + // Kinds introduced by Passes: + addOffset12, /// Location contains LDR to change into ADD. + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + }; + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool targetUnnamed); + + // Utility functions for inspecting/updating instructions. + static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp); + static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp); + static Arm64Kind offset12KindFromInstruction(uint32_t instr); + static uint32_t setImm12(uint32_t instr, uint32_t offset); +}; + +ArchHandler_arm64::ArchHandler_arm64() {} + +ArchHandler_arm64::~ArchHandler_arm64() {} + +const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(branch26), + LLD_KIND_STRING_ENTRY(page21), + LLD_KIND_STRING_ENTRY(offset12), + LLD_KIND_STRING_ENTRY(offset12scale2), + LLD_KIND_STRING_ENTRY(offset12scale4), + LLD_KIND_STRING_ENTRY(offset12scale8), + LLD_KIND_STRING_ENTRY(offset12scale16), + LLD_KIND_STRING_ENTRY(gotPage21), + LLD_KIND_STRING_ENTRY(gotOffset12), + LLD_KIND_STRING_ENTRY(tlvPage21), + LLD_KIND_STRING_ENTRY(tlvOffset12), + LLD_KIND_STRING_ENTRY(pointer64), + LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(pointer64ToGOT), + LLD_KIND_STRING_ENTRY(delta32ToGOT), + + LLD_KIND_STRING_ENTRY(addOffset12), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(imageOffset), + LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + { Reference::KindArch::AArch64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + + // arm64 code alignment 2^2 + 2, + + // Stub size and code + 12, + { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12scale8, 4, 0 }, + + // Stub Helper size and code + 12, + { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0 + 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper + 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0 + { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::AArch64, branch26, 4, 0 }, + + // Stub Helper-Common size and code + 24, + { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page + 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff + 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]! + 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12, 4, 0 }, + { Reference::KindArch::AArch64, page21, 12, 0 }, + { true, offset12scale8, 16, 0 } +}; + +bool ArchHandler_arm64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + return (ref.kindValue() == branch26); +} + +bool ArchHandler_arm64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64); +} + +bool ArchHandler_arm64::isPairedReloc(const Relocation &r) { + return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR)); +} + +uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr, + int32_t displacement) { + assert((displacement <= 134217727) && (displacement > (-134217728)) && + "arm64 branch out of range"); + return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF); +} + +uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction, + int64_t displacement) { + assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) && + "arm64 ADRP out of range"); + assert(((instruction & 0x9F000000) == 0x90000000) && + "reloc not on ADRP instruction"); + uint32_t immhi = (displacement >> 9) & (0x00FFFFE0); + uint32_t immlo = (displacement << 17) & (0x60000000); + return (instruction & 0x9F00001F) | immlo | immhi; +} + +ArchHandler_arm64::Arm64Kind +ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) { + if (instruction & 0x08000000) { + switch ((instruction >> 30) & 0x3) { + case 0: + if ((instruction & 0x04800000) == 0x04800000) + return offset12scale16; + return offset12; + case 1: + return offset12scale2; + case 2: + return offset12scale4; + case 3: + return offset12scale8; + } + } + return offset12; +} + +uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) { + assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range"); + uint32_t imm12 = offset << 10; + return (instruction & 0xFFC003FF) | imm12; +} + +std::error_code ArchHandler_arm64::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc)) { + case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4: + // ex: bl _foo + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@PAGEOFF] + *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@GOTPAGE + *kind = gotPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@GOTPAGEOFF] + *kind = gotOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@TLVPAGE + *kind = tlvPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@TLVPAGEOFF] + *kind = tlvOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_UNSIGNED | rExtern | rLength8: + // ex: .quad _foo + N + *kind = pointer64; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little64_t *)fixupContent; + return std::error_code(); + case ARM64_RELOC_UNSIGNED | rLength8: + // ex: .quad Lfoo + N + *kind = pointer64; + return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent, + target, addend); + case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8: + // ex: .quad _foo@GOT + *kind = pointer64ToGOT; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: + // ex: .long _foo@GOT - . + *kind = delta32ToGOT; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return std::error_code(); + default: + return make_dynamic_error_code(Twine("unsupported arm64 relocation type")); + } +} + +std::error_code ArchHandler_arm64::getPairReferenceInfo( + const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, + bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + const uint32_t *cont32 = reinterpret_cast<const uint32_t *>(fixupContent); + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4): + // ex: bl _foo+8 + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return std::error_code(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return std::error_code(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): + // ex: ldr w0, [x1, _foo@PAGEOFF] + *kind = offset12KindFromInstruction(*cont32); + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return std::error_code(); + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength8): + // ex: .quad _foo - . + *kind = delta64; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; + return std::error_code(); + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength4): + // ex: .quad _foo - . + *kind = delta32; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; + return std::error_code(); + default: + return make_dynamic_error_code(Twine("unsupported arm64 relocation pair")); + } +} + +void ArchHandler_arm64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) { + // Copy raw bytes. + memcpy(atomContentBuffer, atom.rawContent().data(), atom.size()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + bool targetUnnamed = target->name().empty(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, targetUnnamed); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, imageBaseAddress, + findSectionAddress); + } + } +} + +void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + int32_t displacement; + uint32_t instruction; + uint32_t value32; + uint32_t value64; + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + displacement = (targetAddress - fixupAddress) + ref.addend(); + *loc32 = setDisplacementInBranch26(*loc32, displacement); + return; + case page21: + case gotPage21: + case tlvPage21: + displacement = + ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096)); + *loc32 = setDisplacementInADRP(*loc32, displacement); + return; + case offset12: + case gotOffset12: + case tlvOffset12: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + *loc32 = setImm12(*loc32, displacement); + return; + case offset12scale2: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x1) == 0) && + "scaled imm12 not accessing 2-byte aligneds"); + *loc32 = setImm12(*loc32, displacement >> 1); + return; + case offset12scale4: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x3) == 0) && + "scaled imm12 not accessing 4-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 2); + return; + case offset12scale8: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x7) == 0) && + "scaled imm12 not accessing 8-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 3); + return; + case offset12scale16: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0xF) == 0) && + "scaled imm12 not accessing 16-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 4); + return; + case addOffset12: + instruction = *loc32; + assert(((instruction & 0xFFC00000) == 0xF9400000) && + "GOT reloc is not an LDR instruction"); + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + value32 = 0x91000000 | (instruction & 0x000003FF); + instruction = setImm12(value32, displacement); + *loc32 = instruction; + return; + case pointer64: + case pointer64ToGOT: + *loc64 = targetAddress + ref.addend(); + return; + case delta64: + case unwindFDEToFunction: + *loc64 = (targetAddress - fixupAddress) + ref.addend(); + return; + case delta32: + case delta32ToGOT: + *loc32 = (targetAddress - fixupAddress) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case imageOffsetGot: + llvm_unreachable("imageOffsetGot should have been changed to imageOffset"); + break; + case unwindInfoToEhFrame: + value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(value64 < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | value64; + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid arm64 Reference Kind"); +} + +void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool targetUnnamed) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + *loc32 = setDisplacementInBranch26(*loc32, 0); + return; + case page21: + case gotPage21: + case tlvPage21: + *loc32 = setDisplacementInADRP(*loc32, 0); + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + case gotOffset12: + case tlvOffset12: + *loc32 = setImm12(*loc32, 0); + return; + case pointer64: + if (targetUnnamed) + *loc64 = targetAddress + ref.addend(); + else + *loc64 = ref.addend(); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case negDelta32: + *loc32 = fixupAddress - inAtomAddress + ref.addend(); + return; + case pointer64ToGOT: + *loc64 = 0; + return; + case delta32ToGOT: + *loc32 = -fixupAddress; + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case unwindFDEToFunction: + // Do nothing for now + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +void ArchHandler_arm64::appendSectionRelocations( + const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } + return; + case page21: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } + return; + case gotPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case gotOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case tlvPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case tlvOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case pointer64: + if (ref.target()->name().empty()) + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rLength8); + else + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength8); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case pointer64ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8); + return; + case delta32ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4); + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + llvm_unreachable("deltas from mach_header can only be in final images"); + case unwindFDEToFunction: + case unwindInfoToEhFrame: + case negDelta32: + // Do nothing. + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/lib/ReaderWriter/MachO/ArchHandler_x86.cpp new file mode 100644 index 000000000000..19c8780e707a --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -0,0 +1,642 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; + +using llvm::support::little16_t; +using llvm::support::little32_t; + +class ArchHandler_x86 : public ArchHandler { +public: + ArchHandler_x86(); + virtual ~ArchHandler_x86(); + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::x86; } + + const StubInfo &stubInfo() override { return _sStubInfo; } + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return false; + } + Reference::KindValue imageOffsetKind() override { + return invalid; + } + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return delta32; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + std::error_code getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + std::error_code + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86Kind : Reference::KindValue { + invalid, /// for error condition + + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + branch16, /// ex: callw _foo + abs32, /// ex: movl _foo, %eax + funcRel32, /// ex: movl _foo-L1(%eax), %eax + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + static bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_x86 +//===----------------------------------------------------------------------===// + +ArchHandler_x86::ArchHandler_x86() {} + +ArchHandler_x86::~ArchHandler_x86() { } + +const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(branch16), + LLD_KIND_STRING_ENTRY(abs32), + LLD_KIND_STRING_ENTRY(funcRel32), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86, pointer32, 0, 0 }, + { Reference::KindArch::x86, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86, pointer32, 0, 0 }, + + // x86 code alignment + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86, abs32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86, branch32, 6, 0 }, + + // Stub Helper-Common size and code + 12, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind + 0x90 }, // nop + { Reference::KindArch::x86, abs32, 1, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86, abs32, 7, 0 }, + { false, 0, 0, 0 } +}; + +bool ArchHandler_x86::isCallSite(const Reference &ref) { + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { + if (!reloc.scattered) + return false; + return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || + (reloc.type == GENERIC_RELOC_SECTDIFF); +} + +std::error_code +ArchHandler_x86::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + typedef std::error_code E; + DefinedAtom::ContentPermissions perms; + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (relocPattern(reloc)) { + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: + // ex: call _foo (and _foo undefined) + *kind = branch32; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength4: + // ex: call _foo (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: + // ex: call _foo+n (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: + // ex: callw _foo (and _foo undefined) + *kind = branch16; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength2: + // ex: callw _foo (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: + // ex: callw _foo+n (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rExtern | rLength4: + // ex: movl _foo, %eax (and _foo undefined) + // ex: .long _foo (and _foo undefined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const ulittle32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rLength4: + // ex: movl _foo, %eax (and _foo defined) + // ex: .long _foo (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + targetAddress = *(const ulittle32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+n (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (E ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = *(const ulittle32_t *)fixupContent - reloc.value; + break; + default: + return make_dynamic_error_code(Twine("unsupported i386 relocation type")); + } + return std::error_code(); +} + +std::error_code +ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + std::error_code ec; + DefinedAtom::ContentPermissions perms = inAtom->permissions(); + uint32_t fromAddress; + uint32_t toAddress; + uint32_t value; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + toAddress = reloc1.value; + fromAddress = reloc2.value; + value = *(const little32_t *)fixupContent; + ec = atomFromAddr(0, toAddress, target, &offsetInTo); + if (ec) + return ec; + ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom); + if (ec) + return ec; + if (fromTarget != inAtom) { + if (*target != inAtom) + return make_dynamic_error_code(Twine("SECTDIFF relocation where " + "neither target is in atom")); + *kind = negDelta32; + *addend = toAddress - value - fromAddress; + *target = fromTarget; + } else { + if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { + // SECTDIFF relocations are used in i386 codegen where the function + // prolog does a CALL to the next instruction which POPs the return + // address into EBX which becomes the pic-base register. The POP + // instruction is label the used for the subtrahend in expressions. + // The funcRel32 kind represents the 32-bit delta to some symbol from + // the start of the function (atom) containing the funcRel32. + *kind = funcRel32; + uint32_t ta = fromAddress + value - toAddress; + *addend = ta - offsetInFrom; + } else { + *kind = delta32; + *addend = fromAddress + value - toAddress; + } + } + return std::error_code(); + break; + default: + return make_dynamic_error_code(Twine("unsupported i386 relocation type")); + } +} + +void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) { + // Copy raw bytes. + memcpy(atomContentBuffer, atom.rawContent().data(), atom.size()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } + } +} + +void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + break; + case branch16: + *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle16_t *loc16 = reinterpret_cast<ulittle16_t *>(loc); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + if (useExternalReloc) + *loc32 = ref.addend() - (fixupAddress + 4); + else + *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); + break; + case branch16: + if (useExternalReloc) + *loc16 = ref.addend() - (fixupAddress + 2); + else + *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + + +void ArchHandler_x86::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + switch (static_cast<X86Kind>(ref.kindValue())) { + case modeCode: + case modeData: + break; + case branch32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength4); + } + break; + case branch16: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength2); + } + break; + case pointer32: + case abs32: + if (useExternalReloc) + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rLength4); + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rLength4); + } + break; + case funcRel32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case negDelta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + break; + case invalid: + llvm_unreachable("unknown x86 Reference Kind"); + break; + } +} + + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp new file mode 100644 index 000000000000..81fe1af42d7e --- /dev/null +++ b/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp @@ -0,0 +1,723 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_x86_64 : public ArchHandler { +public: + ArchHandler_x86_64(); + virtual ~ArchHandler_x86_64(); + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::x86_64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case ripRel32GotLoad: + canBypassGOT = true; + return true; + case ripRel32Got: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + /// Used by GOTPass to update GOT References + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + + switch (ref->kindValue()) { + case ripRel32Got: + assert(targetNowGOT && "target must be GOT"); + case ripRel32GotLoad: + const_cast<Reference *>(ref) + ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("unknown GOT reference kind"); + } + } + + bool needsCompactUnwind() override { + return true; + } + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + std::error_code getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + std::error_code + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBase, + uint8_t *atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86_64Kind: Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + ripRel32, /// ex: movq _foo(%rip), %rax + ripRel32Minus1, /// ex: movb $0x12, _foo(%rip) + ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip) + ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip) + ripRel32Anon, /// ex: movq L1(%rip), %rax + ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax + ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip) + pointer64, /// ex: .quad _foo + pointer64Anon, /// ex: .quad L1 + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + delta64Anon, /// ex: .quad L1 - . + delta32Anon, /// ex: .long L1 - . + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so + /// "movq _foo@GOTPCREL(%rip), %rax" can be changed + /// to "leaq _foo(%rip), %rax + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + }; + + Reference::KindValue kindFromReloc(const normalized::Relocation &reloc); + Reference::KindValue kindFromRelocPair(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + + +ArchHandler_x86_64::ArchHandler_x86_64() { } + +ArchHandler_x86_64::~ArchHandler_x86_64() { } + +const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(ripRel32), LLD_KIND_STRING_ENTRY(ripRel32Minus1), + LLD_KIND_STRING_ENTRY(ripRel32Minus2), LLD_KIND_STRING_ENTRY(ripRel32Minus4), + LLD_KIND_STRING_ENTRY(ripRel32Anon), LLD_KIND_STRING_ENTRY(ripRel32GotLoad), + LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea), + LLD_KIND_STRING_ENTRY(ripRel32Got), LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(pointer64), LLD_KIND_STRING_ENTRY(pointer64Anon), + LLD_KIND_STRING_ENTRY(delta32), LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32Anon), LLD_KIND_STRING_ENTRY(delta64Anon), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(imageOffset), LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + { Reference::KindArch::x86_64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + + // x86_64 code alignment 2^1 + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86_64, ripRel32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86_64, branch32, 6, 0 }, + + // Stub Helper-Common size and code + 16, + { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11 + 0x41, 0x53, // push %r11 + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip) + 0x90 }, // nop + { Reference::KindArch::x86_64, ripRel32, 3, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86_64, ripRel32, 11, 0 }, + { false, 0, 0, 0 } + +}; + +bool ArchHandler_x86_64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86_64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64 || kind == pointer64Anon); +} + +bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) { + return (reloc.type == X86_64_RELOC_SUBTRACTOR); +} + +Reference::KindValue +ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) { + switch(relocPattern(reloc)) { + case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4: + return branch32; + case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4: + return ripRel32; + case X86_64_RELOC_SIGNED | rPcRel | rLength4: + return ripRel32Anon; + case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4: + return ripRel32Minus1; + case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4: + return ripRel32Minus2; + case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4: + return ripRel32Minus4; + case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4: + return ripRel32GotLoad; + case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4: + return ripRel32Got; + case X86_64_RELOC_UNSIGNED | rExtern | rLength8: + return pointer64; + case X86_64_RELOC_UNSIGNED | rLength8: + return pointer64Anon; + default: + return invalid; + } +} + +std::error_code +ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + typedef std::error_code E; + *kind = kindFromReloc(reloc); + if (*kind == invalid) + return make_dynamic_error_code(Twine("unknown type")); + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (*kind) { + case branch32: + case ripRel32: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return std::error_code(); + case ripRel32Minus1: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 1; + return std::error_code(); + case ripRel32Minus2: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 2; + return std::error_code(); + case ripRel32Minus4: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 4; + return std::error_code(); + case ripRel32Anon: + targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32GotLoad: + case ripRel32Got: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return std::error_code(); + case pointer64: + if (E ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little64_t *)fixupContent; + return std::error_code(); + case pointer64Anon: + targetAddress = *(const little64_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + default: + llvm_unreachable("bad reloc kind"); + } +} + +Reference::KindValue +ArchHandler_x86_64::kindFromRelocPair(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2) { + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength8): + return delta64; + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength4): + return delta32; + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rLength8): + return delta64Anon; + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rLength4): + return delta32Anon; + default: + llvm_unreachable("bad reloc pairs"); + } +} + +std::error_code +ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + *kind = kindFromRelocPair(reloc1, reloc2); + if (*kind == invalid) + return make_dynamic_error_code(Twine("unknown pair")); + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + typedef std::error_code E; + uint64_t targetAddress; + const lld::Atom *fromTarget; + if (E ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget)) + return ec; + if (fromTarget != inAtom) + return make_dynamic_error_code(Twine("pointer diff not in base atom")); + switch (*kind) { + case delta64: + if (E ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; + return std::error_code(); + case delta32: + if (E ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; + return std::error_code(); + case delta64Anon: + targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + case delta32Anon: + targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + default: + llvm_unreachable("bad reloc pair kind"); + } +} + +void ArchHandler_x86_64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + uint8_t *atomContentBuffer) { + // Copy raw bytes. + memcpy(atomContentBuffer, atom.rawContent().data(), atom.size()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress, imageBaseAddress, findSectionAddress); + } + } +} + +void ArchHandler_x86_64::applyFixupFinal( + const Reference &ref, uint8_t *loc, uint64_t fixupAddress, + uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Anon: + case ripRel32Got: + case ripRel32GotLoad: + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case pointer64: + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case ripRel32Minus1: + *loc32 = targetAddress - (fixupAddress + 5) + ref.addend(); + return; + case ripRel32Minus2: + *loc32 = targetAddress - (fixupAddress + 6) + ref.addend(); + return; + case ripRel32Minus4: + *loc32 = targetAddress - (fixupAddress + 8) + ref.addend(); + return; + case delta32: + case delta32Anon: + *loc32 = targetAddress - fixupAddress + ref.addend(); + return; + case delta64: + case delta64Anon: + case unwindFDEToFunction: + *loc64 = targetAddress - fixupAddress + ref.addend(); + return; + case ripRel32GotLoadNowLea: + // Change MOVQ to LEA + assert(loc[-2] == 0x8B); + loc[-2] = 0x8D; + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + case imageOffsetGot: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case unwindInfoToEhFrame: { + uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(val < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | val; + return; + } + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid x86_64 Reference Kind"); +} + + +void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Got: + case ripRel32GotLoad: + *loc32 = ref.addend(); + return; + case ripRel32Anon: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + return; + case pointer64: + *loc64 = ref.addend(); + return; + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case ripRel32Minus1: + *loc32 = ref.addend() - 1; + return; + case ripRel32Minus2: + *loc32 = ref.addend() - 2; + return; + case ripRel32Minus4: + *loc32 = ref.addend() - 4; + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta32Anon: + *loc32 = (targetAddress - fixupAddress) + ref.addend(); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta64Anon: + *loc64 = (targetAddress - fixupAddress) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case unwindFDEToFunction: + // Do nothing for now + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4); + return; + case ripRel32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rLength4 ); + return; + case ripRel32Got: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 ); + return; + case ripRel32GotLoad: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 ); + return; + case pointer64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case pointer64Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8); + return; + case ripRel32Minus1: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus2: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus4: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 ); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case delta32Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength4 ); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case delta64Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8 ); + return; + case unwindFDEToFunction: + case unwindInfoToEhFrame: + case negDelta32: + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + llvm_unreachable("__unwind_info references should have been resolved"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86_64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86_64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/Atoms.h b/lib/ReaderWriter/MachO/Atoms.h new file mode 100644 index 000000000000..8d60c1a163a6 --- /dev/null +++ b/lib/ReaderWriter/MachO/Atoms.h @@ -0,0 +1,181 @@ +//===- lib/ReaderWriter/MachO/Atoms.h -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ATOMS_H +#define LLD_READER_WRITER_MACHO_ATOMS_H + +#include "lld/Core/Simple.h" + +namespace lld { +namespace mach_o { +class MachODefinedAtom : public SimpleDefinedAtom { +public: + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, Merge merge, bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, Alignment align) + : SimpleDefinedAtom(f), _name(name), _content(content), + _align(align), _contentType(type), _scope(scope), _merge(merge), + _thumb(thumb), _noDeadStrip(noDeadStrip) {} + + // Constructor for zero-fill content + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + uint64_t size, bool noDeadStrip, Alignment align) + : SimpleDefinedAtom(f), _name(name), + _content(ArrayRef<uint8_t>(nullptr, size)), _align(align), + _contentType(DefinedAtom::typeZeroFill), + _scope(scope), _merge(mergeNo), _thumb(false), + _noDeadStrip(noDeadStrip) {} + + uint64_t size() const override { return _content.size(); } + + ContentType contentType() const override { return _contentType; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + Merge merge() const override { return _merge; } + + DeadStripKind deadStrip() const override { + if (_contentType == DefinedAtom::typeInitializerPtr) + return deadStripNever; + if (_contentType == DefinedAtom::typeTerminatorPtr) + return deadStripNever; + if (_noDeadStrip) + return deadStripNever; + return deadStripNormal; + } + + ArrayRef<uint8_t> rawContent() const override { + // Note: Zerofill atoms have a content pointer which is null. + return _content; + } + + bool isThumb() const { return _thumb; } + + void addReference(uint32_t offsetInAtom, uint16_t relocType, + const Atom *target, Reference::Addend addend, + Reference::KindArch arch = Reference::KindArch::x86_64, + Reference::KindNamespace ns + = Reference::KindNamespace::mach_o) { + SimpleDefinedAtom::addReference(ns, arch, relocType, offsetInAtom, target, + addend); + } + +private: + const StringRef _name; + const ArrayRef<uint8_t> _content; + const DefinedAtom::Alignment _align; + const ContentType _contentType; + const Scope _scope; + const Merge _merge; + const bool _thumb; + const bool _noDeadStrip; +}; + +class MachODefinedCustomSectionAtom : public MachODefinedAtom { +public: + MachODefinedCustomSectionAtom(const File &f, const StringRef name, + Scope scope, ContentType type, Merge merge, + bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, + StringRef sectionName, Alignment align) + : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip, + content, align), + _sectionName(sectionName) {} + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionCustomRequired; + } + + StringRef customSectionName() const override { + return _sectionName; + } +private: + StringRef _sectionName; +}; + + +class MachOTentativeDefAtom : public SimpleDefinedAtom { +public: + MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope, + uint64_t size, DefinedAtom::Alignment align) + : SimpleDefinedAtom(f), _name(name), _scope(scope), _size(size), + _align(align) {} + + uint64_t size() const override { return _size; } + + Merge merge() const override { return DefinedAtom::mergeAsTentative; } + + ContentType contentType() const override { return DefinedAtom::typeZeroFill; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + +private: + const StringRef _name; + const Scope _scope; + const uint64_t _size; + const DefinedAtom::Alignment _align; +}; + +class MachOSharedLibraryAtom : public SharedLibraryAtom { +public: + MachOSharedLibraryAtom(const File &file, StringRef name, + StringRef dylibInstallName, bool weakDef) + : SharedLibraryAtom(), _file(file), _name(name), + _dylibInstallName(dylibInstallName) {} + virtual ~MachOSharedLibraryAtom() {} + + virtual StringRef loadName() const override { + return _dylibInstallName; + } + + virtual bool canBeNullAtRuntime() const override { + // FIXME: this may actually be changeable. For now, all symbols are strongly + // defined though. + return false; + } + + virtual const File& file() const override { + return _file; + } + + virtual StringRef name() const override { + return _name; + } + + virtual Type type() const override { + // Unused in MachO (I think). + return Type::Unknown; + } + + virtual uint64_t size() const override { + // Unused in MachO (I think) + return 0; + } + +private: + const File &_file; + StringRef _name; + StringRef _dylibInstallName; +}; + + +} // mach_o +} // lld + +#endif diff --git a/lib/ReaderWriter/MachO/CMakeLists.txt b/lib/ReaderWriter/MachO/CMakeLists.txt new file mode 100644 index 000000000000..e396537c63c8 --- /dev/null +++ b/lib/ReaderWriter/MachO/CMakeLists.txt @@ -0,0 +1,26 @@ +add_llvm_library(lldMachO + ArchHandler.cpp + ArchHandler_arm.cpp + ArchHandler_arm64.cpp + ArchHandler_x86.cpp + ArchHandler_x86_64.cpp + CompactUnwindPass.cpp + GOTPass.cpp + LayoutPass.cpp + MachOLinkingContext.cpp + MachONormalizedFileBinaryReader.cpp + MachONormalizedFileBinaryWriter.cpp + MachONormalizedFileFromAtoms.cpp + MachONormalizedFileToAtoms.cpp + MachONormalizedFileYAML.cpp + ShimPass.cpp + StubsPass.cpp + WriterMachO.cpp + LINK_LIBS + lldCore + lldYAML + LLVMObject + LLVMSupport + ) + +include_directories(.) diff --git a/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/lib/ReaderWriter/MachO/CompactUnwindPass.cpp new file mode 100644 index 000000000000..fc8608383e5d --- /dev/null +++ b/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -0,0 +1,530 @@ +//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file A pass to convert MachO's __compact_unwind sections into the final +/// __unwind_info format used during runtime. See +/// mach-o/compact_unwind_encoding.h for more details on the formats involved. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +#define DEBUG_TYPE "macho-compact-unwind" + +namespace lld { +namespace mach_o { + +namespace { +struct CompactUnwindEntry { + const Atom *rangeStart; + const Atom *personalityFunction; + const Atom *lsdaLocation; + const Atom *ehFrame; + + uint32_t rangeLength; + + // There are 3 types of compact unwind entry, distinguished by the encoding + // value: 0 indicates a function with no unwind info; + // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to + // __eh_frame, and that the ehFrame entry will be valid; any other value is a + // real compact unwind entry -- personalityFunction will be set and + // lsdaLocation may be. + uint32_t encoding; + + CompactUnwindEntry(const DefinedAtom *function) + : rangeStart(function), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), + encoding(0) {} + + CompactUnwindEntry() + : rangeStart(nullptr), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} +}; + +struct UnwindInfoPage { + std::vector<CompactUnwindEntry> entries; +}; +} + +class UnwindInfoAtom : public SimpleDefinedAtom { +public: + UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, + std::vector<const Atom *> &personalities, + std::vector<uint32_t> &commonEncodings, + std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) + : SimpleDefinedAtom(file), _archHandler(archHandler), + _commonEncodingsOffset(7 * sizeof(uint32_t)), + _personalityArrayOffset(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)), + _topLevelIndexOffset(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)), + _lsdaIndexOffset(_topLevelIndexOffset + + 3 * (pages.size() + 1) * sizeof(uint32_t)), + _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), + _isBig(isBig) { + + addHeader(commonEncodings.size(), personalities.size(), pages.size()); + addCommonEncodings(commonEncodings); + addPersonalityFunctions(personalities); + addTopLevelIndexes(pages); + addLSDAIndexes(pages, numLSDAs); + addSecondLevelPages(pages); + } + + ContentType contentType() const override { + return DefinedAtom::typeProcessedUnwindInfo; + } + + Alignment alignment() const override { return Alignment(2); } + + uint64_t size() const override { return _contents.size(); } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { return _contents; } + + void addHeader(uint32_t numCommon, uint32_t numPersonalities, + uint32_t numPages) { + using normalized::write32; + + uint32_t headerSize = 7 * sizeof(uint32_t); + _contents.resize(headerSize); + + uint8_t *headerEntries = _contents.data(); + // version + write32(headerEntries, 1, _isBig); + // commonEncodingsArraySectionOffset + write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); + // commonEncodingsArrayCount + write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); + // personalityArraySectionOffset + write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, + _isBig); + // personalityArrayCount + write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); + // indexSectionOffset + write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); + // indexCount + write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); + } + + /// Add the list of common encodings to the section; this is simply an array + /// of uint32_t compact values. Size has already been specified in the header. + void addCommonEncodings(std::vector<uint32_t> &commonEncodings) { + using normalized::write32; + + _contents.resize(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)); + uint8_t *commonEncodingsArea = + reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset); + + for (uint32_t encoding : commonEncodings) { + write32(commonEncodingsArea, encoding, _isBig); + commonEncodingsArea += sizeof(uint32_t); + } + } + + void addPersonalityFunctions(std::vector<const Atom *> personalities) { + _contents.resize(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < personalities.size(); ++i) + addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), + personalities[i]); + } + + void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) { + using normalized::write32; + + uint32_t numIndexes = pages.size() + 1; + _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); + + uint32_t pageLoc = _firstPageOffset; + + // The most difficult job here is calculating the LSDAs; everything else + // follows fairly naturally, but we can't state where the first + uint8_t *indexData = &_contents[_topLevelIndexOffset]; + uint32_t numLSDAs = 0; + for (unsigned i = 0; i < pages.size(); ++i) { + // functionOffset + addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), + pages[i].entries[0].rangeStart); + // secondLevelPagesSectionOffset + write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); + write32(indexData + (3 * i + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + + for (auto &entry : pages[i].entries) + if (entry.lsdaLocation) + ++numLSDAs; + } + + // Finally, write out the final sentinel index + CompactUnwindEntry &finalEntry = pages[pages.size() - 1].entries.back(); + addImageReference(_topLevelIndexOffset + + 3 * pages.size() * sizeof(uint32_t), + finalEntry.rangeStart, finalEntry.rangeLength); + // secondLevelPagesSectionOffset => 0 + write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + } + + void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) { + _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); + + uint32_t curOffset = _lsdaIndexOffset; + for (auto &page : pages) { + for (auto &entry : page.entries) { + if (!entry.lsdaLocation) + continue; + + addImageReference(curOffset, entry.rangeStart); + addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); + curOffset += 2 * sizeof(uint32_t); + } + } + } + + void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) { + for (auto &page : pages) { + addRegularSecondLevelPage(page); + } + } + + void addRegularSecondLevelPage(const UnwindInfoPage &page) { + uint32_t curPageOffset = _contents.size(); + const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); + uint32_t curPageSize = + headerSize + 2 * page.entries.size() * sizeof(uint32_t); + _contents.resize(curPageOffset + curPageSize); + + using normalized::write32; + using normalized::write16; + // 2 => regular page + write32(&_contents[curPageOffset], 2, _isBig); + // offset of 1st entry + write16(&_contents[curPageOffset + 4], headerSize, _isBig); + write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); + + uint32_t pagePos = curPageOffset + headerSize; + for (auto &entry : page.entries) { + addImageReference(pagePos, entry.rangeStart); + + write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, + _isBig); + if ((entry.encoding & 0x0f000000U) == + _archHandler.dwarfCompactUnwindType()) + addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); + + pagePos += 2 * sizeof(uint32_t); + } + } + + void addEhFrameReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); + } + + void addImageReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKind(), offset, dest, addend); + } + + void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKindIndirect(), offset, dest, 0); + } + +private: + mach_o::ArchHandler &_archHandler; + std::vector<uint8_t> _contents; + uint32_t _commonEncodingsOffset; + uint32_t _personalityArrayOffset; + uint32_t _topLevelIndexOffset; + uint32_t _lsdaIndexOffset; + uint32_t _firstPageOffset; + bool _isBig; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class CompactUnwindPass : public Pass { +public: + CompactUnwindPass(const MachOLinkingContext &context) + : _context(context), _archHandler(_context.archHandler()), + _file("<mach-o Compact Unwind Pass>"), + _isBig(MachOLinkingContext::isBigEndian(_context.arch())) {} + +private: + void perform(std::unique_ptr<MutableFile> &mergedFile) override { + DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); + + std::map<const Atom *, CompactUnwindEntry> unwindLocs; + std::map<const Atom *, const Atom *> dwarfFrames; + std::vector<const Atom *> personalities; + uint32_t numLSDAs = 0; + + // First collect all __compact_unwind and __eh_frame entries, addressable by + // the function referred to. + collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, + numLSDAs); + + collectDwarfFrameEntries(mergedFile, dwarfFrames); + + // Skip rest of pass if no unwind info. + if (unwindLocs.empty() && dwarfFrames.empty()) + return; + + // FIXME: if there are more than 4 personality functions then we need to + // defer to DWARF info for the ones we don't put in the list. They should + // also probably be sorted by frequency. + assert(personalities.size() <= 4); + + // TODO: Find commmon encodings for use by compressed pages. + std::vector<uint32_t> commonEncodings; + + // Now sort the entries by final address and fixup the compact encoding to + // its final form (i.e. set personality function bits & create DWARF + // references where needed). + std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries( + mergedFile, unwindLocs, personalities, dwarfFrames); + + // Finally, we can start creating pages based on these entries. + + DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); + // FIXME: we split the entries into pages naively: lots of 4k pages followed + // by a small one. ld64 tried to minimize space and align them to real 4k + // boundaries. That might be worth doing, or perhaps we could perform some + // minor balancing for expected number of lookups. + std::vector<UnwindInfoPage> pages; + unsigned pageStart = 0; + do { + pages.push_back(UnwindInfoPage()); + + // FIXME: we only create regular pages at the moment. These can hold up to + // 1021 entries according to the documentation. + unsigned entriesInPage = + std::min(1021U, (unsigned)unwindInfos.size() - pageStart); + + std::copy(unwindInfos.begin() + pageStart, + unwindInfos.begin() + pageStart + entriesInPage, + std::back_inserter(pages.back().entries)); + pageStart += entriesInPage; + + DEBUG(llvm::dbgs() + << " Page from " << pages.back().entries[0].rangeStart->name() + << " to " << pages.back().entries.back().rangeStart->name() << " + " + << llvm::format("0x%x", pages.back().entries.back().rangeLength) + << " has " << entriesInPage << " entries\n"); + } while (pageStart < unwindInfos.size()); + + UnwindInfoAtom *unwind = new (_file.allocator()) + UnwindInfoAtom(_archHandler, _file, _isBig, personalities, + commonEncodings, pages, numLSDAs); + mergedFile->addAtom(*unwind); + + // Finally, remove all __compact_unwind atoms now that we've processed them. + mergedFile->removeDefinedAtomsIf([](const DefinedAtom *atom) { + return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; + }); + } + + void collectCompactUnwindEntries( + std::unique_ptr<MutableFile> &mergedFile, + std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + std::vector<const Atom *> &personalities, uint32_t &numLSDAs) { + DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); + + for (const DefinedAtom *atom : mergedFile->defined()) { + if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) + continue; + + auto unwindEntry = extractCompactUnwindEntry(atom); + unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); + + DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name() + << ", encoding=" + << llvm::format("0x%08x", unwindEntry.encoding)); + if (unwindEntry.personalityFunction) + DEBUG(llvm::dbgs() << ", personality=" + << unwindEntry.personalityFunction->name() + << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); + DEBUG(llvm::dbgs() << '\n'); + + // Count number of LSDAs we see, since we need to know how big the index + // will be while laying out the section. + if (unwindEntry.lsdaLocation) + ++numLSDAs; + + // Gather the personality functions now, so that they're in deterministic + // order (derived from the DefinedAtom order). + if (unwindEntry.personalityFunction) { + auto pFunc = std::find(personalities.begin(), personalities.end(), + unwindEntry.personalityFunction); + if (pFunc == personalities.end()) + personalities.push_back(unwindEntry.personalityFunction); + } + } + } + + CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { + CompactUnwindEntry entry; + + for (const Reference *ref : *atom) { + switch (ref->offsetInAtom()) { + case 0: + // FIXME: there could legitimately be functions with multiple encoding + // entries. However, nothing produces them at the moment. + assert(ref->addend() == 0 && "unexpected offset into function"); + entry.rangeStart = ref->target(); + break; + case 0x10: + assert(ref->addend() == 0 && "unexpected offset into personality fn"); + entry.personalityFunction = ref->target(); + break; + case 0x18: + assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); + entry.lsdaLocation = ref->target(); + break; + } + } + + if (atom->rawContent().size() < 4 * sizeof(uint32_t)) + return entry; + + using normalized::read32; + entry.rangeLength = + read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); + entry.encoding = + read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); + return entry; + } + + void + collectDwarfFrameEntries(std::unique_ptr<MutableFile> &mergedFile, + std::map<const Atom *, const Atom *> &dwarfFrames) { + for (const DefinedAtom *ehFrameAtom : mergedFile->defined()) { + if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) + continue; + if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) + continue; + + if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) + dwarfFrames[function] = ehFrameAtom; + } + } + + /// Every atom defined in __TEXT,__text needs an entry in the final + /// __unwind_info section (in order). These comes from two sources: + /// + Input __compact_unwind sections where possible (after adding the + /// personality function offset which is only known now). + /// + A synthesised reference to __eh_frame if there's no __compact_unwind + /// or too many personality functions to be accommodated. + std::vector<CompactUnwindEntry> createUnwindInfoEntries( + const std::unique_ptr<MutableFile> &mergedFile, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + std::vector<CompactUnwindEntry> unwindInfos; + + DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); + // The final order in the __unwind_info section must be derived from the + // order of typeCode atoms, since that's how they'll be put into the object + // file eventually (yuck!). + for (const DefinedAtom *atom : mergedFile->defined()) { + if (atom->contentType() != DefinedAtom::typeCode) + continue; + + unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( + atom, unwindLocs, personalities, dwarfFrames)); + + DEBUG(llvm::dbgs() << " Entry for " << atom->name() + << ", final encoding=" + << llvm::format("0x%08x", unwindInfos.back().encoding) + << '\n'); + } + + return unwindInfos; + } + + CompactUnwindEntry finalizeUnwindInfoEntryForAtom( + const DefinedAtom *function, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + auto unwindLoc = unwindLocs.find(function); + + CompactUnwindEntry entry; + if (unwindLoc == unwindLocs.end()) { + // Default entry has correct encoding (0 => no unwind), but we need to + // synthesise the function. + entry.rangeStart = function; + entry.rangeLength = function->size(); + } else + entry = unwindLoc->second; + + + // If there's no __compact_unwind entry, or it explicitly says to use + // __eh_frame, we need to try and fill in the correct DWARF atom. + if (entry.encoding == _archHandler.dwarfCompactUnwindType() || + entry.encoding == 0) { + auto dwarfFrame = dwarfFrames.find(function); + if (dwarfFrame != dwarfFrames.end()) { + entry.encoding = _archHandler.dwarfCompactUnwindType(); + entry.ehFrame = dwarfFrame->second; + } + } + + + auto personality = std::find(personalities.begin(), personalities.end(), + entry.personalityFunction); + uint32_t personalityIdx = personality == personalities.end() + ? 0 + : personality - personalities.begin() + 1; + + // FIXME: We should also use DWARF when there isn't enough room for the + // personality function in the compact encoding. + assert(personalityIdx < 4 && "too many personality functions"); + + entry.encoding |= personalityIdx << 28; + + if (entry.lsdaLocation) + entry.encoding |= 1U << 30; + + return entry; + } + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + MachOFile _file; + bool _isBig; +}; + +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsCompactUnwindPass()); + pm.add(llvm::make_unique<CompactUnwindPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/lib/ReaderWriter/MachO/ExecutableAtoms.hpp b/lib/ReaderWriter/MachO/ExecutableAtoms.hpp new file mode 100644 index 000000000000..cd562de216d9 --- /dev/null +++ b/lib/ReaderWriter/MachO/ExecutableAtoms.hpp @@ -0,0 +1,136 @@ +//===- lib/ReaderWriter/MachO/ExecutableAtoms.hpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H +#define LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H + +#include "Atoms.h" + +#include "llvm/Support/MachO.h" + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + + +// +// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving +// phase will fail if "_main" is undefined. +// +class CEntryFile : public SimpleFile { +public: + CEntryFile(const MachOLinkingContext &context) + : SimpleFile("C entry"), + _undefMain(*this, context.entrySymbolName()) { + this->addAtom(_undefMain); + } + +private: + SimpleUndefinedAtom _undefMain; +}; + + +// +// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that +// the Resolveing phase will fail if "dyld_stub_binder" is undefined. +// +class StubHelperFile : public SimpleFile { +public: + StubHelperFile(const MachOLinkingContext &context) + : SimpleFile("stub runtime"), + _undefBinder(*this, context.binderSymbolName()) { + this->addAtom(_undefBinder); + } + +private: + SimpleUndefinedAtom _undefBinder; +}; + + +// +// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start +// of the mach_header for final linked images. +// +class MachHeaderAliasFile : public ArchiveLibraryFile { +public: + MachHeaderAliasFile(const MachOLinkingContext &context) + : ArchiveLibraryFile("mach_header symbols") { + switch (context.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + _machHeaderSymbolName = "__mh_execute_header"; + break; + case llvm::MachO::MH_DYLIB: + _machHeaderSymbolName = "__mh_dylib_header"; + break; + case llvm::MachO::MH_BUNDLE: + _machHeaderSymbolName = "__mh_bundle_header"; + break; + case llvm::MachO::MH_DYLINKER: + _machHeaderSymbolName = "__mh_dylinker_header"; + break; + case llvm::MachO::MH_PRELOAD: + _machHeaderSymbolName = "__mh_preload_header"; + break; + default: + llvm_unreachable("no mach_header symbol for file type"); + } + } + + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + return std::error_code(); + } + + File *find(StringRef sym, bool dataSymbolOnly) override { + if (sym.equals("___dso_handle") || sym.equals(_machHeaderSymbolName)) { + _definedAtoms._atoms.push_back(new (allocator()) MachODefinedAtom( + *this, sym, DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false, false, + ArrayRef<uint8_t>(), DefinedAtom::Alignment(12,0))); + return this; + } + return nullptr; + } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + +private: + mutable atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; + StringRef _machHeaderSymbolName; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H diff --git a/lib/ReaderWriter/MachO/File.h b/lib/ReaderWriter/MachO/File.h new file mode 100644 index 000000000000..913644ec1fc0 --- /dev/null +++ b/lib/ReaderWriter/MachO/File.h @@ -0,0 +1,327 @@ +//===- lib/ReaderWriter/MachO/File.h --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FILE_H +#define LLD_READER_WRITER_MACHO_FILE_H + +#include "Atoms.h" +#include "MachONormalizedFile.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/StringMap.h" +#include <unordered_map> + +namespace lld { +namespace mach_o { + +using lld::mach_o::normalized::Section; + +class MachOFile : public SimpleFile { +public: + MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SimpleFile(mb->getBufferIdentifier()), _mb(std::move(mb)), _ctx(ctx) {} + + MachOFile(StringRef path) : SimpleFile(path) {} + + void addDefinedAtom(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + uint64_t sectionOffset, uint64_t contentSize, bool thumb, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % ((uint64_t)1 << inSection->alignment)); + MachODefinedAtom *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, merge, + thumb, noDeadStrip, content, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + bool thumb, bool noDeadStrip, uint64_t sectionOffset, + uint64_t contentSize, StringRef sectionName, + bool copyRefs, const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + sectionName = sectionName.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % ((uint64_t)1 << inSection->alignment)); + MachODefinedCustomSectionAtom *atom = + new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type, + merge, thumb, + noDeadStrip, content, + sectionName, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope, + uint64_t sectionOffset, uint64_t size, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % ((uint64_t)1 << inSection->alignment)); + MachODefinedAtom *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, size, noDeadStrip, + align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addUndefinedAtom(StringRef name, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + SimpleUndefinedAtom *atom = + new (allocator()) SimpleUndefinedAtom(*this, name); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size, + DefinedAtom::Alignment align, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + MachOTentativeDefAtom *atom = + new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + /// Search this file for an the atom from 'section' that covers + /// 'offsetInSect'. Returns nullptr is no atom found. + MachODefinedAtom *findAtomCoveringAddress(const Section §ion, + uint64_t offsetInSect, + uint32_t *foundOffsetAtom=nullptr) { + const auto &pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return nullptr; + const auto &vec = pos->second; + assert(offsetInSect < section.content.size()); + // Vector of atoms for section are already sorted, so do binary search. + const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect, + [offsetInSect](const SectionOffsetAndAtom &ao, + uint64_t targetAddr) -> bool { + // Each atom has a start offset of its slice of the + // section's content. This compare function must return true + // iff the atom's range is before the offset being searched for. + uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size(); + return (atomsEndOffset <= offsetInSect); + }); + if (atomPos == vec.end()) + return nullptr; + if (foundOffsetAtom) + *foundOffsetAtom = offsetInSect - atomPos->offset; + return atomPos->atom; + } + + /// Searches this file for an UndefinedAtom named 'name'. Returns + /// nullptr is no such atom found. + const lld::Atom *findUndefAtom(StringRef name) { + auto pos = _undefAtoms.find(name); + if (pos == _undefAtoms.end()) + return nullptr; + return pos->second; + } + + typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor; + + void eachDefinedAtom(DefinedAtomVisitor vistor) { + for (auto §AndAtoms : _sectionAtoms) { + for (auto &offAndAtom : sectAndAtoms.second) { + vistor(offAndAtom.atom); + } + } + } + + typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)> + SectionAtomVisitor; + + void eachAtomInSection(const Section §ion, SectionAtomVisitor visitor) { + auto pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return; + auto vec = pos->second; + + for (auto &offAndAtom : vec) + visitor(offAndAtom.atom, offAndAtom.offset); + } + +protected: + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (std::error_code ec = normFile.getError()) + return ec; + // Convert normalized mach-o to atoms. + if (std::error_code ec = normalized::normalizedObjectToAtoms( + this, **normFile, false)) + return ec; + return std::error_code(); + } + +private: + struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; }; + + void addAtomForSection(const Section *inSection, MachODefinedAtom* atom, + uint64_t sectionOffset) { + SectionOffsetAndAtom offAndAtom; + offAndAtom.offset = sectionOffset; + offAndAtom.atom = atom; + _sectionAtoms[inSection].push_back(offAndAtom); + addAtom(*atom); + } + + + typedef llvm::DenseMap<const normalized::Section *, + std::vector<SectionOffsetAndAtom>> SectionToAtoms; + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + SectionToAtoms _sectionAtoms; + NameToAtom _undefAtoms; +}; + +class MachODylibFile : public SharedLibraryFile { +public: + MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SharedLibraryFile(mb->getBufferIdentifier()), + _mb(std::move(mb)), _ctx(ctx) {} + + MachODylibFile(StringRef path) : SharedLibraryFile(path) {} + + const SharedLibraryAtom *exports(StringRef name, bool isData) const override { + // Pass down _installName so that if this requested symbol + // is re-exported through this dylib, the SharedLibraryAtom's loadName() + // is this dylib installName and not the implementation dylib's. + // NOTE: isData is not needed for dylibs (it matters for static libs). + return exports(name, _installName); + } + + /// Adds symbol name that this dylib exports. The corresponding + /// SharedLibraryAtom is created lazily (since most symbols are not used). + void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) { + if (copyRefs) { + name = name.copy(allocator()); + } + AtomAndFlags info(weakDef); + _nameToAtom[name] = info; + } + + void addReExportedDylib(StringRef dylibPath) { + _reExportedDylibs.emplace_back(dylibPath); + } + + StringRef installName() { return _installName; } + uint32_t currentVersion() { return _currentVersion; } + uint32_t compatVersion() { return _compatVersion; } + + void setInstallName(StringRef name) { _installName = name; } + void setCompatVersion(uint32_t version) { _compatVersion = version; } + void setCurrentVersion(uint32_t version) { _currentVersion = version; } + + typedef std::function<MachODylibFile *(StringRef)> FindDylib; + + void loadReExportedDylibs(FindDylib find) { + for (ReExportedDylib &entry : _reExportedDylibs) { + entry.file = find(entry.path); + } + } + + StringRef getDSOName() const override { return _installName; } + + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (std::error_code ec = normFile.getError()) + return ec; + // Convert normalized mach-o to atoms. + if (std::error_code ec = normalized::normalizedDylibToAtoms( + this, **normFile, false)) + return ec; + return std::error_code(); + } + +private: + const SharedLibraryAtom *exports(StringRef name, + StringRef installName) const { + // First, check if requested symbol is directly implemented by this dylib. + auto entry = _nameToAtom.find(name); + if (entry != _nameToAtom.end()) { + if (!entry->second.atom) { + // Lazily create SharedLibraryAtom. + entry->second.atom = + new (allocator()) MachOSharedLibraryAtom(*this, name, installName, + entry->second.weakDef); + } + return entry->second.atom; + } + + // Next, check if symbol is implemented in some re-exported dylib. + for (const ReExportedDylib &dylib : _reExportedDylibs) { + assert(dylib.file); + auto atom = dylib.file->exports(name, installName); + if (atom) + return atom; + } + + // Symbol not exported or re-exported by this dylib. + return nullptr; + } + + + struct ReExportedDylib { + ReExportedDylib(StringRef p) : path(p), file(nullptr) { } + StringRef path; + MachODylibFile *file; + }; + + struct AtomAndFlags { + AtomAndFlags() : atom(nullptr), weakDef(false) { } + AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { } + const SharedLibraryAtom *atom; + bool weakDef; + }; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + StringRef _installName; + uint32_t _currentVersion; + uint32_t _compatVersion; + std::vector<ReExportedDylib> _reExportedDylibs; + mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/MachO/GOTPass.cpp b/lib/ReaderWriter/MachO/GOTPass.cpp new file mode 100644 index 000000000000..1ddec4003cbd --- /dev/null +++ b/lib/ReaderWriter/MachO/GOTPass.cpp @@ -0,0 +1,185 @@ +//===- lib/ReaderWriter/MachO/GOTPass.cpp ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all GOT kind references to real references. +/// That is, in assembly you can write something like: +/// movq foo@GOTPCREL(%rip), %rax +/// which means you want to load a pointer to "foo" out of the GOT (global +/// Offsets Table). In the object file, the Atom containing this instruction +/// has a Reference whose target is an Atom named "foo" and the Reference +/// kind is a GOT load. The linker needs to instantiate a pointer sized +/// GOT entry. This is done be creating a GOT Atom to represent that pointer +/// sized data in this pass, and altering the Atom graph so the Reference now +/// points to the GOT Atom entry (corresponding to "foo") and changing the +/// Reference Kind to reflect it is now pointing to a GOT entry (rather +/// then needing a GOT entry). +/// +/// There is one optimization the linker can do here. If the target of the GOT +/// is in the same linkage unit and does not need to be interposable, and +/// the GOT use is just a load (not some other operation), this pass can +/// transform that load into an LEA (add). This optimizes away one memory load +/// which at runtime that could stall the pipeline. This optimization only +/// works for architectures in which a (GOT) load instruction can be change to +/// an LEA instruction that is the same size. The method isGOTAccess() should +/// only return true for "canBypassGOT" if this optimization is supported. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + + +// +// GOT Entry Atom created by the GOT pass. +// +class GOTEntryAtom : public SimpleDefinedAtom { +public: + GOTEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) { } + + ContentType contentType() const override { + return DefinedAtom::typeGOT; + } + + Alignment alignment() const override { + return Alignment(_is64 ? 3 : 2); + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + + +/// Pass for instantiating and optimizing GOT slots. +/// +class GOTPass : public Pass { +public: + GOTPass(const MachOLinkingContext &context) + : _context(context), _archHandler(_context.archHandler()), + _file("<mach-o GOT Pass>") { } + +private: + + void perform(std::unique_ptr<MutableFile> &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile->defined()) { + for (const Reference *ref : *atom) { + // Look at instructions accessing the GOT. + bool canBypassGOT; + if (!_archHandler.isGOTAccess(*ref, canBypassGOT)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + + if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) { + // Update reference kind to reflect that target is a direct accesss. + _archHandler.updateReferenceToGOT(ref, false); + } else { + // Replace the target with a reference to a GOT entry. + const DefinedAtom *gotEntry = makeGOTEntry(target); + const_cast<Reference *>(ref)->setTarget(gotEntry); + // Update reference kind to reflect that target is now a GOT entry. + _archHandler.updateReferenceToGOT(ref, true); + } + } + } + + // Sort and add all created GOT Atoms to master file + std::vector<const GOTEntryAtom *> entries; + entries.reserve(_targetToGOT.size()); + for (auto &it : _targetToGOT) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const GOTEntryAtom *left, const GOTEntryAtom *right) { + return (left->slotName().compare(right->slotName()) < 0); + }); + for (const GOTEntryAtom *slot : entries) + mergedFile->addAtom(*slot); + } + + bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { + // Accesses to shared library symbols must go through GOT. + if (isa<SharedLibraryAtom>(target)) + return true; + // Accesses to interposable symbols in same linkage unit must also go + // through GOT. + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget != nullptr && + defTarget->interposable() != DefinedAtom::interposeNo) { + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + return true; + } + // Target does not require indirection. So, if instruction allows GOT to be + // by-passed, do that optimization and don't create GOT entry. + return !canBypassGOT; + } + + const DefinedAtom *makeGOTEntry(const Atom *target) { + auto pos = _targetToGOT.find(target); + if (pos == _targetToGOT.end()) { + GOTEntryAtom *gotEntry = new (_file.allocator()) + GOTEntryAtom(_file, _context.is64Bit(), target->name()); + _targetToGOT[target] = gotEntry; + const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo(). + nonLazyPointerReferenceToBinder; + gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return gotEntry; + } + return pos->second; + } + + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + MachOFile _file; + llvm::DenseMap<const Atom*, const GOTEntryAtom*> _targetToGOT; +}; + + + +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsGOTPass()); + pm.add(llvm::make_unique<GOTPass>(ctx)); +} + + +} // end namesapce mach_o +} // end namesapce lld diff --git a/lib/ReaderWriter/MachO/LayoutPass.cpp b/lib/ReaderWriter/MachO/LayoutPass.cpp new file mode 100644 index 000000000000..2d096e4c1a6a --- /dev/null +++ b/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -0,0 +1,482 @@ +//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LayoutPass.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +#include <set> + +using namespace lld; + +#define DEBUG_TYPE "LayoutPass" + +namespace lld { +namespace mach_o { + +static bool compareAtoms(const LayoutPass::SortKey &, + const LayoutPass::SortKey &, + LayoutPass::SortOverride customSorter); + +#ifndef NDEBUG +// Return "reason (leftval, rightval)" +static std::string formatReason(StringRef reason, int leftVal, int rightVal) { + return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")") + .str(); +} + +// Less-than relationship of two atoms must be transitive, which is, if a < b +// and b < c, a < c must be true. This function checks the transitivity by +// checking the sort results. +static void checkTransitivity(std::vector<LayoutPass::SortKey> &vec, + LayoutPass::SortOverride customSorter) { + for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) { + for (auto j = i + 1; j != e; ++j) { + assert(compareAtoms(*i, *j, customSorter)); + assert(!compareAtoms(*j, *i, customSorter)); + } + } +} + +// Helper functions to check follow-on graph. +typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + +static std::string atomToDebugString(const Atom *atom) { + const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom); + std::string str; + llvm::raw_string_ostream s(str); + if (definedAtom->name().empty()) + s << "<anonymous " << definedAtom << ">"; + else + s << definedAtom->name(); + s << " in "; + if (definedAtom->customSectionName().empty()) + s << "<anonymous>"; + else + s << definedAtom->customSectionName(); + s.flush(); + return str; +} + +static void showCycleDetectedError(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *atom) { + const DefinedAtom *start = atom; + llvm::dbgs() << "There's a cycle in a follow-on chain!\n"; + do { + llvm::dbgs() << " " << atomToDebugString(atom) << "\n"; + for (const Reference *ref : *atom) { + StringRef kindValStr; + if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(), + ref->kindValue(), kindValStr)) { + kindValStr = "<unknown>"; + } + llvm::dbgs() << " " << kindValStr + << ": " << atomToDebugString(ref->target()) << "\n"; + } + atom = followOnNexts[atom]; + } while (atom != start); + llvm::report_fatal_error("Cycle detected"); +} + +/// Exit if there's a cycle in a followon chain reachable from the +/// given root atom. Uses the tortoise and hare algorithm to detect a +/// cycle. +static void checkNoCycleInFollowonChain(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *root) { + const DefinedAtom *tortoise = root; + const DefinedAtom *hare = followOnNexts[root]; + while (true) { + if (!tortoise || !hare) + return; + if (tortoise == hare) + showCycleDetectedError(registry, followOnNexts, tortoise); + tortoise = followOnNexts[tortoise]; + hare = followOnNexts[followOnNexts[hare]]; + } +} + +static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots, + const DefinedAtom *atom) { + if (!atom) return; + auto i = followOnRoots.find(atom); + if (i == followOnRoots.end()) { + llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) + + "> has no follow-on root!")) + .str() + .c_str()); + } + const DefinedAtom *ap = i->second; + while (true) { + const DefinedAtom *next = followOnRoots[ap]; + if (!next) { + llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) + + "> is not reachable from its root!")) + .str() + .c_str()); + } + if (next == ap) + return; + ap = next; + } +} + +static void printDefinedAtoms(const MutableFile::DefinedAtomRange &atomRange) { + for (const DefinedAtom *atom : atomRange) { + llvm::dbgs() << " file=" << atom->file().path() + << ", name=" << atom->name() + << ", size=" << atom->size() + << ", type=" << atom->contentType() + << ", ordinal=" << atom->ordinal() + << "\n"; + } +} + +/// Verify that the followon chain is sane. Should not be called in +/// release binary. +void LayoutPass::checkFollowonChain(MutableFile::DefinedAtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain"); + + // Verify that there's no cycle in follow-on chain. + std::set<const DefinedAtom *> roots; + for (const auto &ai : _followOnRoots) + roots.insert(ai.second); + for (const DefinedAtom *root : roots) + checkNoCycleInFollowonChain(_registry, _followOnNexts, root); + + // Verify that all the atoms in followOnNexts have references to + // their roots. + for (const auto &ai : _followOnNexts) { + checkReachabilityFromRoot(_followOnRoots, ai.first); + checkReachabilityFromRoot(_followOnRoots, ai.second); + } +} +#endif // #ifndef NDEBUG + +/// The function compares atoms by sorting atoms in the following order +/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup) +/// b) Sorts atoms by their permissions +/// c) Sorts atoms by their content +/// d) Sorts atoms by custom sorter +/// e) Sorts atoms on how they appear using File Ordinality +/// f) Sorts atoms on how they appear within the File +static bool compareAtomsSub(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter, + std::string &reason) { + const DefinedAtom *left = lc._atom; + const DefinedAtom *right = rc._atom; + if (left == right) { + reason = "same"; + return false; + } + + // Find the root of the chain if it is a part of a follow-on chain. + const DefinedAtom *leftRoot = lc._root; + const DefinedAtom *rightRoot = rc._root; + + // Sort atoms by their ordinal overrides only if they fall in the same + // chain. + if (leftRoot == rightRoot) { + DEBUG(reason = formatReason("override", lc._override, rc._override)); + return lc._override < rc._override; + } + + // Sort same permissions together. + DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions(); + DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions(); + + if (leftPerms != rightPerms) { + DEBUG(reason = + formatReason("contentPerms", (int)leftPerms, (int)rightPerms)); + return leftPerms < rightPerms; + } + + // Sort same content types together. + DefinedAtom::ContentType leftType = leftRoot->contentType(); + DefinedAtom::ContentType rightType = rightRoot->contentType(); + + if (leftType != rightType) { + DEBUG(reason = formatReason("contentType", (int)leftType, (int)rightType)); + return leftType < rightType; + } + + // Use custom sorter if supplied. + if (customSorter) { + bool leftBeforeRight; + if (customSorter(leftRoot, rightRoot, leftBeforeRight)) + return leftBeforeRight; + } + + // Sort by .o order. + const File *leftFile = &leftRoot->file(); + const File *rightFile = &rightRoot->file(); + + if (leftFile != rightFile) { + DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(), + (int)rightFile->ordinal())); + return leftFile->ordinal() < rightFile->ordinal(); + } + + // Sort by atom order with .o file. + uint64_t leftOrdinal = leftRoot->ordinal(); + uint64_t rightOrdinal = rightRoot->ordinal(); + + if (leftOrdinal != rightOrdinal) { + DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(), + (int)rightRoot->ordinal())); + return leftOrdinal < rightOrdinal; + } + + llvm::errs() << "Unordered: <" << left->name() << "> <" + << right->name() << ">\n"; + llvm_unreachable("Atoms with Same Ordinal!"); +} + +static bool compareAtoms(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter) { + std::string reason; + bool result = compareAtomsSub(lc, rc, customSorter, reason); + DEBUG({ + StringRef comp = result ? "<" : ">="; + llvm::dbgs() << "Layout: '" << lc._atom->name() << "' " << comp << " '" + << rc._atom->name() << "' (" << reason << ")\n"; + }); + return result; +} + +LayoutPass::LayoutPass(const Registry ®istry, SortOverride sorter) + : _registry(registry), _customSorter(sorter) {} + +// Returns the atom immediately followed by the given atom in the followon +// chain. +const DefinedAtom *LayoutPass::findAtomFollowedBy( + const DefinedAtom *targetAtom) { + // Start from the beginning of the chain and follow the chain until + // we find the targetChain. + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + const DefinedAtom *prevAtom = atom; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + if (atom == targetAtom) + return prevAtom; + } +} + +// Check if all the atoms followed by the given target atom are of size zero. +// When this method is called, an atom being added is not of size zero and +// will be added to the head of the followon chain. All the atoms between the +// atom and the targetAtom (specified by layout-after) need to be of size zero +// in this case. Otherwise the desired layout is impossible. +bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) { + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + if (atom == targetAtom) + return true; + if (atom->size() != 0) + // TODO: print warning that an impossible layout is being desired by the + // user. + return false; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + } +} + +// Set the root of all atoms in targetAtom's chain to the given root. +void LayoutPass::setChainRoot(const DefinedAtom *targetAtom, + const DefinedAtom *root) { + // Walk through the followon chain and override each node's root. + while (true) { + _followOnRoots[targetAtom] = root; + AtomToAtomT::iterator targetFollowOnAtomsIter = + _followOnNexts.find(targetAtom); + if (targetFollowOnAtomsIter == _followOnNexts.end()) + return; + targetAtom = targetFollowOnAtomsIter->second; + } +} + +/// This pass builds the followon tables described by two DenseMaps +/// followOnRoots and followonNexts. +/// The followOnRoots map contains a mapping of a DefinedAtom to its root +/// The followOnNexts map contains a mapping of what DefinedAtom follows the +/// current Atom +/// The algorithm follows a very simple approach +/// a) If the atom is first seen, then make that as the root atom +/// b) The targetAtom which this Atom contains, has the root thats set to the +/// root of the current atom +/// c) If the targetAtom is part of a different tree and the root of the +/// targetAtom is itself, Chain all the atoms that are contained in the tree +/// to the current Tree +/// d) If the targetAtom is part of a different chain and the root of the +/// targetAtom until the targetAtom has all atoms of size 0, then chain the +/// targetAtoms and its tree to the current chain +void LayoutPass::buildFollowOnTable(MutableFile::DefinedAtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable"); + // Set the initial size of the followon and the followonNext hash to the + // number of atoms that we have. + _followOnRoots.resize(range.size()); + _followOnNexts.resize(range.size()); + for (const DefinedAtom *ai : range) { + for (const Reference *r : *ai) { + if (r->kindNamespace() != lld::Reference::KindNamespace::all || + r->kindValue() != lld::Reference::kindLayoutAfter) + continue; + const DefinedAtom *targetAtom = dyn_cast<DefinedAtom>(r->target()); + _followOnNexts[ai] = targetAtom; + + // If we find a followon for the first time, let's make that atom as the + // root atom. + if (_followOnRoots.count(ai) == 0) + _followOnRoots[ai] = ai; + + auto iter = _followOnRoots.find(targetAtom); + if (iter == _followOnRoots.end()) { + // If the targetAtom is not a root of any chain, let's make the root of + // the targetAtom to the root of the current chain. + + // The expression m[i] = m[j] where m is a DenseMap and i != j is not + // safe. m[j] returns a reference, which would be invalidated when a + // rehashing occurs. If rehashing occurs to make room for m[i], m[j] + // becomes invalid, and that invalid reference would be used as the RHS + // value of the expression. + // Copy the value to workaround. + const DefinedAtom *tmp = _followOnRoots[ai]; + _followOnRoots[targetAtom] = tmp; + continue; + } + if (iter->second == targetAtom) { + // If the targetAtom is the root of a chain, the chain becomes part of + // the current chain. Rewrite the subchain's root to the current + // chain's root. + setChainRoot(targetAtom, _followOnRoots[ai]); + continue; + } + // The targetAtom is already a part of a chain. If the current atom is + // of size zero, we can insert it in the middle of the chain just + // before the target atom, while not breaking other atom's followon + // relationships. If it's not, we can only insert the current atom at + // the beginning of the chain. All the atoms followed by the target + // atom must be of size zero in that case to satisfy the followon + // relationships. + size_t currentAtomSize = ai->size(); + if (currentAtomSize == 0) { + const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom); + _followOnNexts[targetPrevAtom] = ai; + const DefinedAtom *tmp = _followOnRoots[targetPrevAtom]; + _followOnRoots[ai] = tmp; + continue; + } + if (!checkAllPrevAtomsZeroSize(targetAtom)) + break; + _followOnNexts[ai] = _followOnRoots[targetAtom]; + setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]); + } + } +} + +/// Build an ordinal override map by traversing the followon chain, and +/// assigning ordinals to each atom, if the atoms have their ordinals +/// already assigned skip the atom and move to the next. This is the +/// main map thats used to sort the atoms while comparing two atoms together +void LayoutPass::buildOrdinalOverrideMap(MutableFile::DefinedAtomRange &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap"); + uint64_t index = 0; + for (const DefinedAtom *ai : range) { + const DefinedAtom *atom = ai; + if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end()) + continue; + AtomToAtomT::iterator start = _followOnRoots.find(atom); + if (start == _followOnRoots.end()) + continue; + for (const DefinedAtom *nextAtom = start->second; nextAtom != NULL; + nextAtom = _followOnNexts[nextAtom]) { + AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom); + if (pos == _ordinalOverrideMap.end()) + _ordinalOverrideMap[nextAtom] = index++; + } + } +} + +std::vector<LayoutPass::SortKey> +LayoutPass::decorate(MutableFile::DefinedAtomRange &atomRange) const { + std::vector<SortKey> ret; + for (const DefinedAtom *atom : atomRange) { + auto ri = _followOnRoots.find(atom); + auto oi = _ordinalOverrideMap.find(atom); + const DefinedAtom *root = (ri == _followOnRoots.end()) ? atom : ri->second; + uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second; + ret.push_back(SortKey(atom, root, override)); + } + return ret; +} + +void LayoutPass::undecorate(MutableFile::DefinedAtomRange &atomRange, + std::vector<SortKey> &keys) const { + size_t i = 0; + for (SortKey &k : keys) + atomRange[i++] = k._atom; +} + +/// Perform the actual pass +void LayoutPass::perform(std::unique_ptr<MutableFile> &mergedFile) { + // sort the atoms + ScopedTask task(getDefaultDomain(), "LayoutPass"); + MutableFile::DefinedAtomRange atomRange = mergedFile->definedAtoms(); + + // Build follow on tables + buildFollowOnTable(atomRange); + + // Check the structure of followon graph if running in debug mode. + DEBUG(checkFollowonChain(atomRange)); + + // Build override maps + buildOrdinalOverrideMap(atomRange); + + DEBUG({ + llvm::dbgs() << "unsorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + std::vector<LayoutPass::SortKey> vec = decorate(atomRange); + parallel_sort(vec.begin(), vec.end(), + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); + DEBUG(checkTransitivity(vec, _customSorter)); + undecorate(atomRange, vec); + + DEBUG({ + llvm::dbgs() << "sorted atoms:\n"; + printDefinedAtoms(atomRange); + }); +} + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<LayoutPass>( + ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right, + bool & leftBeforeRight) ->bool { + return ctx.customAtomOrderer(left, right, leftBeforeRight); + })); +} + +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/LayoutPass.h b/lib/ReaderWriter/MachO/LayoutPass.h new file mode 100644 index 000000000000..186f29be0719 --- /dev/null +++ b/lib/ReaderWriter/MachO/LayoutPass.h @@ -0,0 +1,97 @@ +//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LAYOUT_PASS_H +#define LLD_READER_WRITER_MACHO_LAYOUT_PASS_H + +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/DenseMap.h" +#include <map> +#include <string> +#include <vector> + +namespace lld { +class DefinedAtom; +class MutableFile; + +namespace mach_o { + +/// This linker pass does the layout of the atoms. The pass is done after the +/// order their .o files were found on the command line, then by order of the +/// atoms (address) in the .o file. But some atoms have a preferred location +/// in their section (such as pinned to the start or end of the section), so +/// the sort must take that into account too. +class LayoutPass : public Pass { +public: + struct SortKey { + SortKey(const DefinedAtom *atom, const DefinedAtom *root, uint64_t override) + : _atom(atom), _root(root), _override(override) {} + const DefinedAtom *_atom; + const DefinedAtom *_root; + uint64_t _override; + }; + + typedef std::function<bool (const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight)> SortOverride; + + LayoutPass(const Registry ®istry, SortOverride sorter); + + /// Sorts atoms in mergedFile by content type then by command line order. + void perform(std::unique_ptr<MutableFile> &mergedFile) override; + + virtual ~LayoutPass() {} + +private: + // Build the followOn atoms chain as specified by the kindLayoutAfter + // reference type + void buildFollowOnTable(MutableFile::DefinedAtomRange &range); + + // Build a map of Atoms to ordinals for sorting the atoms + void buildOrdinalOverrideMap(MutableFile::DefinedAtomRange &range); + + const Registry &_registry; + SortOverride _customSorter; + + typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + typedef llvm::DenseMap<const DefinedAtom *, uint64_t> AtomToOrdinalT; + + // A map to be used to sort atoms. It represents the order of atoms in the + // result; if Atom X is mapped to atom Y in this map, X will be located + // immediately before Y in the output file. Y might be mapped to another + // atom, constructing a follow-on chain. An atom cannot be mapped to more + // than one atom unless all but one atom are of size zero. + AtomToAtomT _followOnNexts; + + // A map to be used to sort atoms. It's a map from an atom to its root of + // follow-on chain. A root atom is mapped to itself. If an atom is not in + // _followOnNexts, the atom is not in this map, and vice versa. + AtomToAtomT _followOnRoots; + + AtomToOrdinalT _ordinalOverrideMap; + + // Helper methods for buildFollowOnTable(). + const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom); + bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom); + + void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root); + + std::vector<SortKey> decorate(MutableFile::DefinedAtomRange &atomRange) const; + void undecorate(MutableFile::DefinedAtomRange &atomRange, + std::vector<SortKey> &keys) const; + + // Check if the follow-on graph is a correct structure. For debugging only. + void checkFollowonChain(MutableFile::DefinedAtomRange &range); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_LAYOUT_PASS_H diff --git a/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/lib/ReaderWriter/MachO/MachOLinkingContext.cpp new file mode 100644 index 000000000000..92385cf3e820 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -0,0 +1,969 @@ +//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "MachOPasses.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/Path.h" +#include <algorithm> + +#if defined(HAVE_CXXABI_H) +#include <cxxabi.h> +#endif + +using lld::mach_o::ArchHandler; +using lld::mach_o::MachODylibFile; +using namespace llvm::MachO; + +namespace lld { + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector<StringRef, 3> parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 65535) + return true; + result = num << 16; + + if (parts.size() > 1) { + if (llvm::getAsUnsignedInteger(parts[1], 10, num)) + return true; + if (num > 255) + return true; + result |= (num << 8); + } + + if (parts.size() > 2) { + if (llvm::getAsUnsignedInteger(parts[2], 10, num)) + return true; + if (num > 255) + return true; + result |= num; + } + + return false; +} + + +MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { + { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, + { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, + { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, + { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, + { "", arch_unknown,false, 0, 0 } +}; + +MachOLinkingContext::Arch +MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) + return info->arch; + } + return arch_unknown; +} + +MachOLinkingContext::Arch +MachOLinkingContext::archFromName(StringRef archName) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->archName.equals(archName)) + return info->arch; + } + return arch_unknown; +} + +StringRef MachOLinkingContext::nameFromArch(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->archName; + } + return "<unknown>"; +} + +uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cputype; + } + llvm_unreachable("Unknown arch type"); +} + +uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cpusubtype; + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) { + return mach_o::normalized::isThinObjectFile(path, arch); +} + +bool MachOLinkingContext::sliceFromFatFile(const MemoryBuffer &mb, + uint32_t &offset, + uint32_t &size) { + return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size); +} + +MachOLinkingContext::MachOLinkingContext() + : _outputMachOType(MH_EXECUTE), _outputMachOTypeStatic(false), + _doNothing(false), _pie(false), _arch(arch_unknown), _os(OS::macOSX), + _osMinVersion(0), _pageZeroSize(0), _pageSize(4096), _baseAddress(0), + _compatibilityVersion(0), _currentVersion(0), _deadStrippableDylib(false), + _printAtoms(false), _testingFileUsage(false), _keepPrivateExterns(false), + _demangle(false), _archHandler(nullptr), + _exportMode(ExportMode::globals), + _debugInfoMode(DebugInfoMode::addDebugMap), _orderFileEntries(0) {} + +MachOLinkingContext::~MachOLinkingContext() {} + +void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os, + uint32_t minOSVersion) { + _outputMachOType = type; + _arch = arch; + _os = os; + _osMinVersion = minOSVersion; + + // If min OS not specified on command line, use reasonable defaults. + if (minOSVersion == 0) { + switch (_arch) { + case arch_x86_64: + case arch_x86: + parsePackedVersion("10.8", _osMinVersion); + _os = MachOLinkingContext::OS::macOSX; + break; + case arch_armv6: + case arch_armv7: + case arch_armv7s: + case arch_arm64: + parsePackedVersion("7.0", _osMinVersion); + _os = MachOLinkingContext::OS::iOS; + break; + default: + break; + } + } + + switch (_outputMachOType) { + case llvm::MachO::MH_EXECUTE: + // If targeting newer OS, use _main + if (minOS("10.8", "6.0")) { + _entrySymbolName = "_main"; + } else { + // If targeting older OS, use start (in crt1.o) + _entrySymbolName = "start"; + } + + // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not + // support) and 4KB on 32-bit. + if (is64Bit(_arch)) { + _pageZeroSize = 0x100000000; + } else { + _pageZeroSize = 0x1000; + } + + // Make PIE by default when targetting newer OSs. + switch (os) { + case OS::macOSX: + if (minOSVersion >= 0x000A0700) // MacOSX 10.7 + _pie = true; + break; + case OS::iOS: + if (minOSVersion >= 0x00040300) // iOS 4.3 + _pie = true; + break; + case OS::iOS_simulator: + _pie = true; + break; + case OS::unknown: + break; + } + break; + case llvm::MachO::MH_DYLIB: + setGlobalsAreDeadStripRoots(true); + break; + case llvm::MachO::MH_BUNDLE: + break; + case llvm::MachO::MH_OBJECT: + _printRemainingUndefines = false; + _allowRemainingUndefines = true; + default: + break; + } + + // Set default segment page sizes based on arch. + if (arch == arch_arm64) + _pageSize = 4*4096; +} + +uint32_t MachOLinkingContext::getCPUType() const { + return cpuTypeFromArch(_arch); +} + +uint32_t MachOLinkingContext::getCPUSubType() const { + return cpuSubtypeFromArch(_arch); +} + +bool MachOLinkingContext::is64Bit(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->cputype & CPU_ARCH_ABI64); + } + } + // unknown archs are not 64-bit. + return false; +} + +bool MachOLinkingContext::isHostEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->littleEndian == llvm::sys::IsLittleEndianHost); + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isBigEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return ! info->littleEndian; + } + } + llvm_unreachable("Unknown arch type"); +} + + + +bool MachOLinkingContext::is64Bit() const { + return is64Bit(_arch); +} + +bool MachOLinkingContext::outputTypeHasEntry() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsStubsPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + return !_outputMachOTypeStatic; + case MH_DYLIB: + case MH_BUNDLE: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsGOTPass() const { + // GOT pass not used in -r mode. + if (_outputMachOType == MH_OBJECT) + return false; + // Only some arches use GOT pass. + switch (_arch) { + case arch_x86_64: + case arch_arm64: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsCompactUnwindPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLIB: + case MH_BUNDLE: + return archHandler().needsCompactUnwind(); + default: + return false; + } +} + +bool MachOLinkingContext::needsShimPass() const { + // Shim pass only used in final executables. + if (_outputMachOType == MH_OBJECT) + return false; + // Only 32-bit arm arches use Shim pass. + switch (_arch) { + case arch_armv6: + case arch_armv7: + case arch_armv7s: + return true; + default: + return false; + } +} + +StringRef MachOLinkingContext::binderSymbolName() const { + return archHandler().stubInfo().binderSymbolName; +} + + + + +bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { + uint32_t parsedVersion; + switch (_os) { + case OS::macOSX: + if (parsePackedVersion(mac, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::iOS: + case OS::iOS_simulator: + if (parsePackedVersion(iOS, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::unknown: + break; + } + llvm_unreachable("target not configured for iOS or MacOSX"); +} + +bool MachOLinkingContext::addEntryPointLoadCommand() const { + if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) { + return minOS("10.8", "6.0"); + } + return false; +} + +bool MachOLinkingContext::addUnixThreadLoadCommand() const { + switch (_outputMachOType) { + case MH_EXECUTE: + if (_outputMachOTypeStatic) + return true; + else + return !minOS("10.8", "6.0"); + break; + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::pathExists(StringRef path) const { + if (!_testingFileUsage) + return llvm::sys::fs::exists(path.str()); + + // Otherwise, we're in test mode: only files explicitly provided on the + // command-line exist. + std::string key = path.str(); + std::replace(key.begin(), key.end(), '\\', '/'); + return _existingPaths.find(key) != _existingPaths.end(); +} + +bool MachOLinkingContext::fileExists(StringRef path) const { + bool found = pathExists(path); + // Log search misses. + if (!found) + addInputFileNotFound(path); + + // When testing, file is never opened, so logging is done here. + if (_testingFileUsage && found) + addInputFileDependency(path); + + return found; +} + +void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) { + _syslibRoots = paths; +} + +void MachOLinkingContext::addRpath(StringRef rpath) { + _rpaths.push_back(rpath); +} + +void MachOLinkingContext::addModifiedSearchDir(StringRef libPath, + bool isSystemPath) { + bool addedModifiedPath = false; + + // -syslibroot only applies to absolute paths. + if (libPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, libPath); + if (pathExists(path)) { + _searchDirs.push_back(path.str().copy(_allocator)); + addedModifiedPath = true; + } + } + } + + if (addedModifiedPath) + return; + + // Finally, if only one -syslibroot is given, system paths which aren't in it + // get suppressed. + if (_syslibRoots.size() != 1 || !isSystemPath) { + if (pathExists(libPath)) { + _searchDirs.push_back(libPath); + } + } +} + +void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath, + bool isSystemPath) { + bool pathAdded = false; + + // -syslibroot only used with to absolute framework search paths. + if (fwPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, fwPath); + if (pathExists(path)) { + _frameworkDirs.push_back(path.str().copy(_allocator)); + pathAdded = true; + } + } + } + // If fwPath found in any -syslibroot, then done. + if (pathAdded) + return; + + // If only one -syslibroot, system paths not in that SDK are suppressed. + if (isSystemPath && (_syslibRoots.size() == 1)) + return; + + // Only use raw fwPath if that directory exists. + if (pathExists(fwPath)) + _frameworkDirs.push_back(fwPath); +} + + +ErrorOr<StringRef> +MachOLinkingContext::searchDirForLibrary(StringRef path, + StringRef libName) const { + SmallString<256> fullPath; + if (libName.endswith(".o")) { + // A request ending in .o is special: just search for the file directly. + fullPath.assign(path); + llvm::sys::path::append(fullPath, libName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + return make_error_code(llvm::errc::no_such_file_or_directory); + } + + // Search for dynamic library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + // If not, try for a static library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + return make_error_code(llvm::errc::no_such_file_or_directory); +} + + + +ErrorOr<StringRef> MachOLinkingContext::searchLibrary(StringRef libName) const { + SmallString<256> path; + for (StringRef dir : searchDirs()) { + ErrorOr<StringRef> ec = searchDirForLibrary(dir, libName); + if (ec) + return ec; + } + + return make_error_code(llvm::errc::no_such_file_or_directory); +} + + +ErrorOr<StringRef> MachOLinkingContext::findPathForFramework(StringRef fwName) const{ + SmallString<256> fullPath; + for (StringRef dir : frameworkDirs()) { + fullPath.assign(dir); + llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + } + + return make_error_code(llvm::errc::no_such_file_or_directory); +} + +bool MachOLinkingContext::validateImpl(raw_ostream &diagnostics) { + // TODO: if -arch not specified, look at arch of first .o file. + + if (_currentVersion && _outputMachOType != MH_DYLIB) { + diagnostics << "error: -current_version can only be used with dylibs\n"; + return false; + } + + if (_compatibilityVersion && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with dylibs\n"; + return false; + } + + if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -mark_dead_strippable_dylib can only be used with dylibs.\n"; + return false; + } + + if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) { + diagnostics + << "error: -bundle_loader can only be used with Mach-O bundles\n"; + return false; + } + + // If -exported_symbols_list used, all exported symbols must be defined. + if (_exportMode == ExportMode::whiteList) { + for (const auto &symbol : _exportedSymbols) + addInitialUndefinedSymbol(symbol.getKey()); + } + + // If -dead_strip, set up initial live symbols. + if (deadStrip()) { + // Entry point is live. + if (outputTypeHasEntry()) + addDeadStripRoot(entrySymbolName()); + // Lazy binding helper is live. + if (needsStubsPass()) + addDeadStripRoot(binderSymbolName()); + // If using -exported_symbols_list, make all exported symbols live. + if (_exportMode == ExportMode::whiteList) { + setGlobalsAreDeadStripRoots(false); + for (const auto &symbol : _exportedSymbols) + addDeadStripRoot(symbol.getKey()); + } + } + + addOutputFileDependency(outputPath()); + + return true; +} + +void MachOLinkingContext::addPasses(PassManager &pm) { + mach_o::addLayoutPass(pm, *this); + if (needsStubsPass()) + mach_o::addStubsPass(pm, *this); + if (needsCompactUnwindPass()) + mach_o::addCompactUnwindPass(pm, *this); + if (needsGOTPass()) + mach_o::addGOTPass(pm, *this); + if (needsShimPass()) + mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass. +} + +Writer &MachOLinkingContext::writer() const { + if (!_writer) + _writer = createWriterMachO(*this); + return *_writer; +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MachOLinkingContext::getMemoryBuffer(StringRef path) { + addInputFileDependency(path); + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = + MemoryBuffer::getFileOrSTDIN(path); + if (std::error_code ec = mbOrErr.getError()) + return ec; + std::unique_ptr<MemoryBuffer> mb = std::move(mbOrErr.get()); + + // If buffer contains a fat file, find required arch in fat buffer + // and switch buffer to point to just that required slice. + uint32_t offset; + uint32_t size; + if (sliceFromFatFile(*mb, offset, size)) + return MemoryBuffer::getFileSlice(path, size, offset); + return std::move(mb); +} + +MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = getMemoryBuffer(path); + if (mbOrErr.getError()) + return nullptr; + + std::vector<std::unique_ptr<File>> files; + if (registry().loadFile(std::move(mbOrErr.get()), files)) + return nullptr; + assert(files.size() == 1 && "expected one file in dylib"); + files[0]->parse(); + MachODylibFile* result = reinterpret_cast<MachODylibFile*>(files[0].get()); + // Node object now owned by _indirectDylibs vector. + _indirectDylibs.push_back(std::move(files[0])); + return result; +} + + +MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) { + // See if already loaded. + auto pos = _pathToDylibMap.find(path); + if (pos != _pathToDylibMap.end()) + return pos->second; + + // Search -L paths if of the form "libXXX.dylib" + std::pair<StringRef, StringRef> split = path.rsplit('/'); + StringRef leafName = split.second; + if (leafName.startswith("lib") && leafName.endswith(".dylib")) { + // FIXME: Need to enhance searchLibrary() to only look for .dylib + auto libPath = searchLibrary(leafName); + if (!libPath.getError()) { + return loadIndirectDylib(libPath.get()); + } + } + + // Try full path with sysroot. + for (StringRef sysPath : _syslibRoots) { + SmallString<256> fullPath; + fullPath.assign(sysPath); + llvm::sys::path::append(fullPath, path); + if (pathExists(fullPath)) + return loadIndirectDylib(fullPath); + } + + // Try full path. + if (pathExists(path)) { + return loadIndirectDylib(path); + } + + return nullptr; +} + +uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->currentVersion(); + else + return 0x1000; // 1.0 +} + +uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->compatVersion(); + else + return 0x1000; // 1.0 +} + +bool MachOLinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + // Add indirect dylibs by asking each linked dylib to add its indirects. + // Iterate until no more dylibs get loaded. + size_t dylibCount = 0; + while (dylibCount != _allDylibs.size()) { + dylibCount = _allDylibs.size(); + for (MachODylibFile *dylib : _allDylibs) { + dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* { + return findIndirectDylib(path); }); + } + } + + // Let writer add output type specific extras. + return writer().createImplicitFiles(result); +} + + +void MachOLinkingContext::registerDylib(MachODylibFile *dylib, + bool upward) const { + _allDylibs.insert(dylib); + _pathToDylibMap[dylib->installName()] = dylib; + // If path is different than install name, register path too. + if (!dylib->path().equals(dylib->installName())) + _pathToDylibMap[dylib->path()] = dylib; + if (upward) + _upwardDylibs.insert(dylib); +} + + +bool MachOLinkingContext::isUpwardDylib(StringRef installName) const { + for (MachODylibFile *dylib : _upwardDylibs) { + if (dylib->installName().equals(installName)) + return true; + } + return false; +} + +ArchHandler &MachOLinkingContext::archHandler() const { + if (!_archHandler) + _archHandler = ArchHandler::create(_arch); + return *_archHandler; +} + + +void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect, + uint8_t align2) { + SectionAlign entry; + entry.segmentName = seg; + entry.sectionName = sect; + entry.align2 = align2; + _sectAligns.push_back(entry); +} + +bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect, + uint8_t &align2) const { + for (const SectionAlign &entry : _sectAligns) { + if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) { + align2 = entry.align2; + return true; + } + } + return false; +} + + +void MachOLinkingContext::addExportSymbol(StringRef sym) { + // Support old crufty export lists with bogus entries. + if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) { + llvm::errs() << "warning: ignoring " << sym << " in export list\n"; + return; + } + // Only i386 MacOSX uses old ABI, so don't change those. + if ((_os != OS::macOSX) || (_arch != arch_x86)) { + // ObjC has two differnent ABIs. Be nice and allow one export list work for + // both ABIs by renaming symbols. + if (sym.startswith(".objc_class_name_")) { + std::string abi2className("_OBJC_CLASS_$_"); + abi2className += sym.substr(17); + _exportedSymbols.insert(copy(abi2className)); + std::string abi2metaclassName("_OBJC_METACLASS_$_"); + abi2metaclassName += sym.substr(17); + _exportedSymbols.insert(copy(abi2metaclassName)); + return; + } + } + + // FIXME: Support wildcards. + _exportedSymbols.insert(sym); +} + +bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const { + switch (_exportMode) { + case ExportMode::globals: + llvm_unreachable("exportSymbolNamed() should not be called in this mode"); + break; + case ExportMode::whiteList: + return _exportedSymbols.count(sym); + case ExportMode::blackList: + return !_exportedSymbols.count(sym); + } + llvm_unreachable("_exportMode unknown enum value"); +} + +std::string MachOLinkingContext::demangle(StringRef symbolName) const { + // Only try to demangle symbols if -demangle on command line + if (!demangleSymbols()) + return symbolName; + + // Only try to demangle symbols that look like C++ symbols + if (!symbolName.startswith("__Z")) + return symbolName; + +#if defined(HAVE_CXXABI_H) + SmallString<256> symBuff; + StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); + // Mach-O has extra leading underscore that needs to be removed. + const char *cstr = nullTermSym.data() + 1; + int status; + char *demangled = abi::__cxa_demangle(cstr, nullptr, nullptr, &status); + if (demangled != NULL) { + std::string result(demangled); + // __cxa_demangle() always uses a malloc'ed buffer to return the result. + free(demangled); + return result; + } +#endif + + return symbolName; +} + +std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { + std::error_code ec; + _dependencyInfo = std::unique_ptr<llvm::raw_fd_ostream>(new + llvm::raw_fd_ostream(path, ec, llvm::sys::fs::F_None)); + if (ec) { + _dependencyInfo.reset(); + return ec; + } + + char linkerVersionOpcode = 0x00; + *_dependencyInfo << linkerVersionOpcode; + *_dependencyInfo << "lld"; // FIXME + *_dependencyInfo << '\0'; + + return std::error_code(); +} + +void MachOLinkingContext::addInputFileDependency(StringRef path) const { + if (!_dependencyInfo) + return; + + char inputFileOpcode = 0x10; + *_dependencyInfo << inputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::addInputFileNotFound(StringRef path) const { + if (!_dependencyInfo) + return; + + char inputFileOpcode = 0x11; + *_dependencyInfo << inputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::addOutputFileDependency(StringRef path) const { + if (!_dependencyInfo) + return; + + char outputFileOpcode = 0x40; + *_dependencyInfo << outputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, + StringRef filename) { + // To support sorting static functions which may have the same name in + // multiple .o files, _orderFiles maps the symbol name to a vector + // of OrderFileNode each of which can specify a file prefix. + OrderFileNode info; + if (!filename.empty()) + info.fileFilter = copy(filename); + info.order = _orderFileEntries++; + _orderFiles[symbol].push_back(info); +} + +bool +MachOLinkingContext::findOrderOrdinal(const std::vector<OrderFileNode> &nodes, + const DefinedAtom *atom, + unsigned &ordinal) { + const File *objFile = &atom->file(); + assert(objFile); + StringRef objName = objFile->path(); + std::pair<StringRef, StringRef> dirAndLeaf = objName.rsplit('/'); + if (!dirAndLeaf.second.empty()) + objName = dirAndLeaf.second; + for (const OrderFileNode &info : nodes) { + if (info.fileFilter.empty()) { + // Have unprefixed symbol name in order file that matches this atom. + ordinal = info.order; + return true; + } + if (info.fileFilter.equals(objName)) { + // Have prefixed symbol name in order file that matches atom's path. + ordinal = info.order; + return true; + } + } + return false; +} + +bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left, + const DefinedAtom *right, + bool &leftBeforeRight) const { + // No custom sorting if no order file entries. + if (!_orderFileEntries) + return false; + + // Order files can only order named atoms. + StringRef leftName = left->name(); + StringRef rightName = right->name(); + if (leftName.empty() || rightName.empty()) + return false; + + // If neither is in order file list, no custom sorter. + auto leftPos = _orderFiles.find(leftName); + auto rightPos = _orderFiles.find(rightName); + bool leftIsOrdered = (leftPos != _orderFiles.end()); + bool rightIsOrdered = (rightPos != _orderFiles.end()); + if (!leftIsOrdered && !rightIsOrdered) + return false; + + // There could be multiple symbols with same name but different file prefixes. + unsigned leftOrder; + unsigned rightOrder; + bool foundLeft = + leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder); + bool foundRight = rightIsOrdered && + findOrderOrdinal(rightPos->getValue(), right, rightOrder); + if (!foundLeft && !foundRight) + return false; + + // If only one is in order file list, ordered one goes first. + if (foundLeft != foundRight) + leftBeforeRight = foundLeft; + else + leftBeforeRight = (leftOrder < rightOrder); + + return true; +} + +static bool isLibrary(const std::unique_ptr<Node> &elem) { + if (FileNode *node = dyn_cast<FileNode>(const_cast<Node *>(elem.get()))) { + File *file = node->getFile(); + return isa<SharedLibraryFile>(file) || isa<ArchiveLibraryFile>(file); + } + return false; +} + +// The darwin linker processes input files in two phases. The first phase +// links in all object (.o) files in command line order. The second phase +// links in libraries in command line order. +// In this function we reorder the input files so that all the object files +// comes before any library file. We also make a group for the library files +// so that the Resolver will reiterate over the libraries as long as we find +// new undefines from libraries. +void MachOLinkingContext::finalizeInputFiles() { + std::vector<std::unique_ptr<Node>> &elements = getNodes(); + std::stable_sort(elements.begin(), elements.end(), + [](const std::unique_ptr<Node> &a, + const std::unique_ptr<Node> &b) { + return !isLibrary(a) && isLibrary(b); + }); + size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary); + elements.push_back(llvm::make_unique<GroupEnd>(numLibs)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/MachO/MachONormalizedFile.h b/lib/ReaderWriter/MachO/MachONormalizedFile.h new file mode 100644 index 000000000000..70bcde2dea22 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -0,0 +1,323 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file These data structures comprise the "normalized" view of +/// mach-o object files. The normalized view is an in-memory only data structure +/// which is always in native endianness and pointer size. +/// +/// The normalized view easily converts to and from YAML using YAML I/O. +/// +/// The normalized view converts to and from binary mach-o object files using +/// the writeBinary() and readBinary() functions. +/// +/// The normalized view converts to and from lld::Atoms using the +/// normalizedToAtoms() and normalizedFromAtoms(). +/// +/// Overall, the conversion paths available look like: +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// v +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ +/// ^ +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ +/// + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/YAMLTraits.h" + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H +#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H + +using llvm::BumpPtrAllocator; +using llvm::yaml::Hex64; +using llvm::yaml::Hex32; +using llvm::yaml::Hex16; +using llvm::yaml::Hex8; +using llvm::yaml::SequenceTraits; +using llvm::MachO::HeaderFileType; +using llvm::MachO::BindType; +using llvm::MachO::RebaseType; +using llvm::MachO::NListType; +using llvm::MachO::RelocationInfoType; +using llvm::MachO::SectionType; +using llvm::MachO::LoadCommandType; +using llvm::MachO::ExportSymbolKind; +using llvm::MachO::DataRegionType; + +namespace lld { +namespace mach_o { +namespace normalized { + + +/// The real mach-o relocation record is 8-bytes on disk and is +/// encoded in one of two different bit-field patterns. This +/// normalized form has the union of all possible fields. +struct Relocation { + Relocation() : offset(0), scattered(false), + type(llvm::MachO::GENERIC_RELOC_VANILLA), + length(0), pcRel(false), isExtern(false), value(0), + symbol(0) { } + + Hex32 offset; + bool scattered; + RelocationInfoType type; + uint8_t length; + bool pcRel; + bool isExtern; + Hex32 value; + uint32_t symbol; +}; + +/// A typedef so that YAML I/O can treat this vector as a sequence. +typedef std::vector<Relocation> Relocations; + +/// A typedef so that YAML I/O can process the raw bytes in a section. +typedef std::vector<Hex8> ContentBytes; + +/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. +typedef std::vector<uint32_t> IndirectSymbols; + +/// A typedef so that YAML I/O can encode/decode section attributes. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) + +/// Mach-O has a 32-bit and 64-bit section record. This normalized form +/// can support either kind. +struct Section { + Section() : type(llvm::MachO::S_REGULAR), + attributes(0), alignment(0), address(0) { } + + StringRef segmentName; + StringRef sectionName; + SectionType type; + SectionAttr attributes; + uint32_t alignment; + Hex64 address; + ArrayRef<uint8_t> content; + Relocations relocations; + IndirectSymbols indirectSymbols; +}; + + +/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) + +/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) + +/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol +/// type and scope and mixed in the same n_type field. This normalized form +/// works for any pointer size and separates out the type and scope. +struct Symbol { + Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } + + StringRef name; + NListType type; + SymbolScope scope; + uint8_t sect; + SymbolDesc desc; + Hex64 value; +}; + +/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) + +/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz +LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) + +/// Segments are only used in normalized final linked images (not in relocatable +/// object files). They specify how a range of the file is loaded. +struct Segment { + StringRef name; + Hex64 address; + Hex64 size; + VMProtect access; +}; + +/// Only used in normalized final linked images to specify on which dylibs +/// it depends. +struct DependentDylib { + StringRef path; + LoadCommandType kind; + PackedVersion compatVersion; + PackedVersion currentVersion; +}; + +/// A normalized rebasing entry. Only used in normalized final linked images. +struct RebaseLocation { + Hex32 segOffset; + uint8_t segIndex; + RebaseType kind; +}; + +/// A normalized binding entry. Only used in normalized final linked images. +struct BindLocation { + Hex32 segOffset; + uint8_t segIndex; + BindType kind; + bool canBeNull; + int ordinal; + StringRef symbolName; + Hex64 addend; +}; + +/// A typedef so that YAML I/O can encode/decode export flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) + +/// A normalized export entry. Only used in normalized final linked images. +struct Export { + StringRef name; + Hex64 offset; + ExportSymbolKind kind; + ExportFlags flags; + Hex32 otherOffset; + StringRef otherName; +}; + +/// A normalized data-in-code entry. +struct DataInCode { + Hex32 offset; + Hex16 length; + DataRegionType kind; +}; + + +/// A typedef so that YAML I/O can encode/decode mach_header.flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) + +/// +struct NormalizedFile { + NormalizedFile() : arch(MachOLinkingContext::arch_unknown), + fileType(llvm::MachO::MH_OBJECT), + flags(0), + hasUUID(false), + os(MachOLinkingContext::OS::unknown) { } + + MachOLinkingContext::Arch arch; + HeaderFileType fileType; + FileFlags flags; + std::vector<Segment> segments; // Not used in object files. + std::vector<Section> sections; + + // Symbols sorted by kind. + std::vector<Symbol> localSymbols; + std::vector<Symbol> globalSymbols; + std::vector<Symbol> undefinedSymbols; + + // Maps to load commands with no LINKEDIT content (final linked images only). + std::vector<DependentDylib> dependentDylibs; + StringRef installName; // dylibs only + PackedVersion compatVersion; // dylibs only + PackedVersion currentVersion; // dylibs only + bool hasUUID; + std::vector<StringRef> rpaths; + Hex64 entryAddress; + MachOLinkingContext::OS os; + Hex64 sourceVersion; + PackedVersion minOSverson; + PackedVersion sdkVersion; + + // Maps to load commands with LINKEDIT content (final linked images only). + Hex32 pageSize; + std::vector<RebaseLocation> rebasingInfo; + std::vector<BindLocation> bindingInfo; + std::vector<BindLocation> weakBindingInfo; + std::vector<BindLocation> lazyBindingInfo; + std::vector<Export> exportInfo; + std::vector<DataInCode> dataInCode; + + // TODO: + // code-signature + // split-seg-info + // function-starts + + // For any allocations in this struct which need to be owned by this struct. + BumpPtrAllocator ownedAllocations; +}; + +/// Tests if a file is a non-fat mach-o object file. +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); + +/// If the buffer is a fat file with the request arch, then this function +/// returns true with 'offset' and 'size' set to location of the arch slice +/// within the buffer. Otherwise returns false; +bool sliceFromFatFile(const MemoryBuffer &mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size); + +/// Reads a mach-o file and produces an in-memory normalized view. +ErrorOr<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch); + +/// Takes in-memory normalized view and writes a mach-o object file. +std::error_code writeBinary(const NormalizedFile &file, StringRef path); + +size_t headerAndLoadCommandsSize(const NormalizedFile &file); + + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +ErrorOr<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb); + +/// Writes a yaml encoded mach-o files given an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); + +std::error_code +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +std::error_code +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +/// Takes in-memory normalized dylib or object and parses it into lld::File +ErrorOr<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs); + +/// Takes atoms and generates a normalized macho-o view. +ErrorOr<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); + + +} // namespace normalized + +/// Class for interfacing mach-o yaml files into generic yaml parsing +class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { +public: + MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) + : _arch(arch) { } + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; +private: + const MachOLinkingContext::Arch _arch; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp new file mode 100644 index 000000000000..07a6dbfe569b --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -0,0 +1,582 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts from +/// mach-o on-disk binary format to in-memory normalized mach-o. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// | +/// | +/// v +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <system_error> + +using namespace llvm::MachO; +using llvm::object::ExportEntry; +using llvm::object::MachOObjectFile; + +namespace lld { +namespace mach_o { +namespace normalized { + +// Utility to call a lambda expression on each load command. +static std::error_code forEachLoadCommand( + StringRef lcRange, unsigned lcCount, bool isBig, bool is64, + std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) { + const char* p = lcRange.begin(); + for (unsigned i=0; i < lcCount; ++i) { + const load_command *lc = reinterpret_cast<const load_command*>(p); + load_command lcCopy; + const load_command *slc = lc; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&lcCopy, lc, sizeof(load_command)); + swapStruct(lcCopy); + slc = &lcCopy; + } + if ( (p + slc->cmdsize) > lcRange.end() ) + return make_error_code(llvm::errc::executable_format_error); + + if (func(slc->cmd, slc->cmdsize, p)) + return std::error_code(); + + p += slc->cmdsize; + } + + return std::error_code(); +} + +static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, + bool bigEndian, + uint32_t reloff, uint32_t nreloc) { + if ((reloff + nreloc*8) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + const any_relocation_info* relocsArray = + reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff); + + for(uint32_t i=0; i < nreloc; ++i) { + relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); + } + return std::error_code(); +} + +static std::error_code +appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, + uint32_t istOffset, uint32_t istCount, + uint32_t startIndex, uint32_t count) { + if ((istOffset + istCount*4) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + if (startIndex+count > istCount) + return make_error_code(llvm::errc::executable_format_error); + const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); + + for(uint32_t i=0; i < count; ++i) { + isyms.push_back(read32( + indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); + } + return std::error_code(); +} + + +template <typename T> static T readBigEndian(T t) { + if (llvm::sys::IsLittleEndianHost) + llvm::sys::swapByteOrder(t); + return t; +} + + +static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { + switch (read32(&mh->magic, false)) { + case llvm::MachO::MH_MAGIC: + is64 = false; + isBig = false; + return true; + case llvm::MachO::MH_MAGIC_64: + is64 = true; + isBig = false; + return true; + case llvm::MachO::MH_CIGAM: + is64 = false; + isBig = true; + return true; + case llvm::MachO::MH_CIGAM_64: + is64 = true; + isBig = true; + return true; + default: + return false; + } +} + + +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { + // Try opening and mapping file at path. + ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path); + if (b.getError()) + return false; + + // If file length < 32 it is too small to be mach-o object file. + StringRef fileBuffer = b->get()->getBuffer(); + if (fileBuffer.size() < 32) + return false; + + // If file buffer does not start with MH_MAGIC (and variants), not obj file. + const mach_header *mh = reinterpret_cast<const mach_header *>( + fileBuffer.begin()); + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return false; + + // If not MH_OBJECT, not object file. + if (read32(&mh->filetype, isBig) != MH_OBJECT) + return false; + + // Lookup up arch from cpu/subtype pair. + arch = MachOLinkingContext::archFromCpuType( + read32(&mh->cputype, isBig), + read32(&mh->cpusubtype, isBig)); + return true; +} + + +bool sliceFromFatFile(const MemoryBuffer &mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size) { + const char *start = mb.getBufferStart(); + const llvm::MachO::fat_header *fh = + reinterpret_cast<const llvm::MachO::fat_header *>(start); + if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) + return false; + uint32_t nfat_arch = readBigEndian(fh->nfat_arch); + const fat_arch *fstart = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header)); + const fat_arch *fend = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) + + sizeof(fat_arch) * nfat_arch); + const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); + const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); + for (const fat_arch *fa = fstart; fa < fend; ++fa) { + if ((readBigEndian(fa->cputype) == reqCpuType) && + (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { + offset = readBigEndian(fa->offset); + size = readBigEndian(fa->size); + if ((offset + size) > mb.getBufferSize()) + return false; + return true; + } + } + return false; +} + +/// Reads a mach-o file and produces an in-memory normalized view. +ErrorOr<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + const char *start = mb->getBufferStart(); + size_t objSize = mb->getBufferSize(); + const mach_header *mh = reinterpret_cast<const mach_header *>(start); + + uint32_t sliceOffset; + uint32_t sliceSize; + if (sliceFromFatFile(*mb, arch, sliceOffset, sliceSize)) { + start = &start[sliceOffset]; + objSize = sliceSize; + mh = reinterpret_cast<const mach_header *>(start); + } + + // Determine endianness and pointer size for mach-o file. + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return make_error_code(llvm::errc::executable_format_error); + + // Endian swap header, if needed. + mach_header headerCopy; + const mach_header *smh = mh; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&headerCopy, mh, sizeof(mach_header)); + swapStruct(headerCopy); + smh = &headerCopy; + } + + // Validate head and load commands fit in buffer. + const uint32_t lcCount = smh->ncmds; + const char *lcStart = + start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); + StringRef lcRange(lcStart, smh->sizeofcmds); + if (lcRange.end() > (start + objSize)) + return make_error_code(llvm::errc::executable_format_error); + + // Get architecture from mach_header. + f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); + if (f->arch != arch) { + return make_dynamic_error_code(Twine("file is wrong architecture. Expected " + "(" + MachOLinkingContext::nameFromArch(arch) + + ") found (" + + MachOLinkingContext::nameFromArch(f->arch) + + ")" )); + } + // Copy file type and flags + f->fileType = HeaderFileType(smh->filetype); + f->flags = smh->flags; + + + // Pre-scan load commands looking for indirect symbol table. + uint32_t indirectSymbolTableOffset = 0; + uint32_t indirectSymbolTableCount = 0; + std::error_code ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&](uint32_t cmd, uint32_t size, + const char *lc) -> bool { + if (cmd == LC_DYSYMTAB) { + const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc); + indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); + indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); + return true; + } + return false; + }); + if (ec) + return ec; + + // Walk load commands looking for segments/sections and the symbol table. + const data_in_code_entry *dataInCode = nullptr; + const dyld_info_command *dyldInfo = nullptr; + uint32_t dataInCodeSize = 0; + ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { + switch(cmd) { + case LC_SEGMENT_64: + if (is64) { + const segment_command_64 *seg = + reinterpret_cast<const segment_command_64*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section_64 *sects = reinterpret_cast<const section_64*> + (lc + sizeof(segment_command_64)); + const unsigned lcSize = sizeof(segment_command_64) + + sectionCount*sizeof(section_64); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section_64 *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = read32(§->align, isBig); + section.address = read64(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read64(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), + isBig, + indirectSymbolTableOffset, + indirectSymbolTableCount, + read32(§->reserved1, isBig), + contentSize/4); + } + f->sections.push_back(section); + } + } + break; + case LC_SEGMENT: + if (!is64) { + const segment_command *seg = + reinterpret_cast<const segment_command*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section *sects = reinterpret_cast<const section*> + (lc + sizeof(segment_command)); + const unsigned lcSize = sizeof(segment_command) + + sectionCount*sizeof(section); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = + read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = read32(§->align, isBig); + section.address = read32(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read32(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols( + section.indirectSymbols, mb->getBuffer(), isBig, + indirectSymbolTableOffset, indirectSymbolTableCount, + read32(§->reserved1, isBig), contentSize / 4); + } + f->sections.push_back(section); + } + } + break; + case LC_SYMTAB: { + const symtab_command *st = reinterpret_cast<const symtab_command*>(lc); + const char *strings = start + read32(&st->stroff, isBig); + const uint32_t strSize = read32(&st->strsize, isBig); + // Validate string pool and symbol table all in buffer. + if (read32((const uint8_t *)&st->stroff, isBig) + + read32((const uint8_t *)&st->strsize, isBig) > + objSize) + return true; + if (is64) { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) + return true; + const nlist_64 *symbols = + reinterpret_cast<const nlist_64 *>(start + symOffset); + // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist_64 *sin = &symbols[i]; + nlist_64 tempSym; + if (isBig != llvm::sys::IsBigEndianHost) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sin->n_type & N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } else { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist)) > objSize) + return true; + const nlist *symbols = + reinterpret_cast<const nlist *>(start + symOffset); + // Convert each nlist to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist *sin = &symbols[i]; + nlist tempSym; + if (isBig != llvm::sys::IsBigEndianHost) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } + } + break; + case LC_ID_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + f->installName = lc + read32(&dl->dylib.name, isBig); + f->currentVersion = read32(&dl->dylib.current_version, isBig); + f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); + } + break; + case LC_DATA_IN_CODE: { + const linkedit_data_command *ldc = + reinterpret_cast<const linkedit_data_command*>(lc); + dataInCode = reinterpret_cast<const data_in_code_entry *>( + start + read32(&ldc->dataoff, isBig)); + dataInCodeSize = read32(&ldc->datasize, isBig); + } + break; + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + DependentDylib entry; + entry.path = lc + read32(&dl->dylib.name, isBig); + entry.kind = LoadCommandType(cmd); + entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); + entry.currentVersion = read32(&dl->dylib.current_version, isBig); + f->dependentDylibs.push_back(entry); + } + break; + case LC_RPATH: { + const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc); + f->rpaths.push_back(lc + read32(&rpc->path, isBig)); + } + break; + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + dyldInfo = reinterpret_cast<const dyld_info_command*>(lc); + break; + } + return false; + }); + if (ec) + return ec; + + if (dataInCode) { + // Convert on-disk data_in_code_entry array to DataInCode vector. + for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { + DataInCode entry; + entry.offset = read32(&dataInCode[i].offset, isBig); + entry.length = read16(&dataInCode[i].length, isBig); + entry.kind = + (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); + f->dataInCode.push_back(entry); + } + } + + if (dyldInfo) { + // If any exports, extract and add to normalized exportInfo vector. + if (dyldInfo->export_size) { + const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start + + dyldInfo->export_off); + ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size); + for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) { + Export normExport; + normExport.name = trieExport.name().copy(f->ownedAllocations); + normExport.offset = trieExport.address(); + normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); + normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; + normExport.otherOffset = trieExport.other(); + if (!trieExport.otherName().empty()) + normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); + f->exportInfo.push_back(normExport); + } + } + } + + return std::move(f); +} + +class MachOObjectReader : public Reader { +public: + MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef ext, + const MemoryBuffer &mb) const override { + switch (magic) { + case llvm::sys::fs::file_magic::macho_object: + return (mb.getBufferSize() > 32); + default: + return false; + } + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®istry, + std::vector<std::unique_ptr<File>> &result) const override { + auto *file = new MachOFile(std::move(mb), &_ctx); + result.push_back(std::unique_ptr<MachOFile>(file)); + return std::error_code(); + } + +private: + MachOLinkingContext &_ctx; +}; + +class MachODylibReader : public Reader { +public: + MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef ext, + const MemoryBuffer &mb) const override { + switch (magic) { + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + return (mb.getBufferSize() > 32); + default: + return false; + } + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry ®istry, + std::vector<std::unique_ptr<File>> &result) const override { + auto *file = new MachODylibFile(std::move(mb), &_ctx); + result.push_back(std::unique_ptr<MachODylibFile>(file)); + return std::error_code(); + } + +private: + MachOLinkingContext &_ctx; +}; + +} // namespace normalized +} // namespace mach_o + +void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { + MachOLinkingContext::Arch arch = ctx.arch(); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx))); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx))); + addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), + ctx.archHandler().kindStrings()); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); +} + + +} // namespace lld diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h new file mode 100644 index 000000000000..613c1b2f251a --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -0,0 +1,177 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include <system_error> + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H +#define LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H + +namespace lld { +namespace mach_o { +namespace normalized { + +using namespace llvm::support::endian; +using llvm::sys::getSwappedBytes; + +template<typename T> +static inline uint16_t read16(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read16be(loc) : read16le(loc); +} + +template<typename T> +static inline uint32_t read32(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read32be(loc) : read32le(loc); +} + +template<typename T> +static inline uint64_t read64(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read64be(loc) : read64le(loc); +} + +inline void write16(uint8_t *loc, uint16_t value, bool isBig) { + if (isBig) + write16be(loc, value); + else + write16le(loc, value); +} + +inline void write32(uint8_t *loc, uint32_t value, bool isBig) { + if (isBig) + write32be(loc, value); + else + write32le(loc, value); +} + +inline void write64(uint8_t *loc, uint64_t value, bool isBig) { + if (isBig) + write64be(loc, value); + else + write64le(loc, value); +} + +inline uint32_t +bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, + uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + return (value >> shift) & mask; +} + +inline void +bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, + uint8_t firstBit, uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + assert((newBits & mask) == newBits); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + bits &= ~(mask << shift); + bits |= (newBits << shift); +} + +inline Relocation unpackRelocation(const llvm::MachO::any_relocation_info &r, + bool isBigEndian) { + uint32_t r0 = read32(&r.r_word0, isBigEndian); + uint32_t r1 = read32(&r.r_word1, isBigEndian); + + Relocation result; + if (r0 & llvm::MachO::R_SCATTERED) { + // scattered relocation record always laid out like big endian bit field + result.offset = bitFieldExtract(r0, true, 8, 24); + result.scattered = true; + result.type = (RelocationInfoType) + bitFieldExtract(r0, true, 4, 4); + result.length = bitFieldExtract(r0, true, 2, 2); + result.pcRel = bitFieldExtract(r0, true, 1, 1); + result.isExtern = false; + result.value = r1; + result.symbol = 0; + } else { + result.offset = r0; + result.scattered = false; + result.type = (RelocationInfoType) + bitFieldExtract(r1, isBigEndian, 28, 4); + result.length = bitFieldExtract(r1, isBigEndian, 25, 2); + result.pcRel = bitFieldExtract(r1, isBigEndian, 24, 1); + result.isExtern = bitFieldExtract(r1, isBigEndian, 27, 1); + result.value = 0; + result.symbol = bitFieldExtract(r1, isBigEndian, 0, 24); + } + return result; +} + + +inline llvm::MachO::any_relocation_info +packRelocation(const Relocation &r, bool swap, bool isBigEndian) { + uint32_t r0 = 0; + uint32_t r1 = 0; + + if (r.scattered) { + r1 = r.value; + bitFieldSet(r0, true, r.offset, 8, 24); + bitFieldSet(r0, true, r.type, 4, 4); + bitFieldSet(r0, true, r.length, 2, 2); + bitFieldSet(r0, true, r.pcRel, 1, 1); + bitFieldSet(r0, true, r.scattered, 0, 1); // R_SCATTERED + } else { + r0 = r.offset; + bitFieldSet(r1, isBigEndian, r.type, 28, 4); + bitFieldSet(r1, isBigEndian, r.isExtern, 27, 1); + bitFieldSet(r1, isBigEndian, r.length, 25, 2); + bitFieldSet(r1, isBigEndian, r.pcRel, 24, 1); + bitFieldSet(r1, isBigEndian, r.symbol, 0, 24); + } + + llvm::MachO::any_relocation_info result; + result.r_word0 = swap ? getSwappedBytes(r0) : r0; + result.r_word1 = swap ? getSwappedBytes(r1) : r1; + return result; +} + +inline StringRef getString16(const char s[16]) { + StringRef x = s; + if ( x.size() > 16 ) + return x.substr(0, 16); + else + return x; +} + +inline void setString16(StringRef str, char s[16]) { + memset(s, 0, 16); + memcpy(s, str.begin(), (str.size() > 16) ? 16: str.size()); +} + +// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so +// that the same table can be used to map mach-o sections to and from +// DefinedAtom::ContentType. +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs); + +} // namespace normalized +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp new file mode 100644 index 000000000000..be7acf9d4d60 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -0,0 +1,1346 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts normalized +/// mach-o in memory to mach-o binary on disk. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// | +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <list> +#include <map> +#include <system_error> + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Utility class for writing a mach-o binary file given an in-memory +/// normalized file. +class MachOFileLayout { +public: + /// All layout computation is done in the constructor. + MachOFileLayout(const NormalizedFile &file); + + /// Returns the final file size as computed in the constructor. + size_t size() const; + + // Returns size of the mach_header and load commands. + size_t headerAndLoadCommandsSize() const; + + /// Writes the normalized file as a binary mach-o file to the specified + /// path. This does not have a stream interface because the generated + /// file may need the 'x' bit set. + std::error_code writeBinary(StringRef path); + +private: + uint32_t loadCommandsSize(uint32_t &count); + void buildFileOffsets(); + void writeMachHeader(); + std::error_code writeLoadCommands(); + void writeSectionContent(); + void writeRelocations(); + void writeSymbolTable(); + void writeRebaseInfo(); + void writeBindingInfo(); + void writeLazyBindingInfo(); + void writeExportInfo(); + void writeDataInCodeInfo(); + void writeLinkEditContent(); + void buildLinkEditInfo(); + void buildRebaseInfo(); + void buildBindInfo(); + void buildLazyBindInfo(); + void buildExportTrie(); + void computeDataInCodeSize(); + void computeSymbolTableSizes(); + void buildSectionRelocations(); + void appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset); + uint32_t indirectSymbolIndex(const Section §, uint32_t &index); + uint32_t indirectSymbolElementSize(const Section §); + + // For use as template parameter to load command methods. + struct MachO64Trait { + typedef llvm::MachO::segment_command_64 command; + typedef llvm::MachO::section_64 section; + enum { LC = llvm::MachO::LC_SEGMENT_64 }; + }; + + // For use as template parameter to load command methods. + struct MachO32Trait { + typedef llvm::MachO::segment_command command; + typedef llvm::MachO::section section; + enum { LC = llvm::MachO::LC_SEGMENT }; + }; + + template <typename T> + std::error_code writeSingleSegmentLoadCommand(uint8_t *&lc); + template <typename T> std::error_code writeSegmentLoadCommands(uint8_t *&lc); + + uint32_t pointerAlign(uint32_t value); + static StringRef dyldPath(); + + class ByteBuffer { + public: + ByteBuffer() : _ostream(_bytes) { } + + void append_byte(uint8_t b) { + _ostream << b; + } + void append_uleb128(uint64_t value) { + llvm::encodeULEB128(value, _ostream); + } + void append_uleb128Fixed(uint64_t value, unsigned byteCount) { + unsigned min = llvm::getULEB128Size(value); + assert(min <= byteCount); + unsigned pad = byteCount - min; + llvm::encodeULEB128(value, _ostream, pad); + } + void append_sleb128(int64_t value) { + llvm::encodeSLEB128(value, _ostream); + } + void append_string(StringRef str) { + _ostream << str; + append_byte(0); + } + void align(unsigned alignment) { + while ( (_ostream.tell() % alignment) != 0 ) + append_byte(0); + } + size_t size() { + return _ostream.tell(); + } + const uint8_t *bytes() { + return reinterpret_cast<const uint8_t*>(_ostream.str().data()); + } + private: + SmallVector<char, 128> _bytes; + // Stream ivar must be after SmallVector ivar to construct properly. + llvm::raw_svector_ostream _ostream; + }; + + struct TrieNode; // Forward declaration. + + struct TrieEdge { + TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} + ~TrieEdge() {} + + StringRef _subString; + struct TrieNode *_child; + }; + + struct TrieNode { + TrieNode(StringRef s) + : _cummulativeString(s), _address(0), _flags(0), _other(0), + _trieOffset(0), _hasExportInfo(false) {} + ~TrieNode() {} + + void addSymbol(const Export &entry, BumpPtrAllocator &allocator, + std::vector<TrieNode *> &allNodes); + bool updateOffset(uint32_t &offset); + void appendToByteBuffer(ByteBuffer &out); + +private: + StringRef _cummulativeString; + std::list<TrieEdge> _children; + uint64_t _address; + uint64_t _flags; + uint64_t _other; + StringRef _importedName; + uint32_t _trieOffset; + bool _hasExportInfo; + }; + + struct SegExtraInfo { + uint32_t fileOffset; + uint32_t fileSize; + std::vector<const Section*> sections; + }; + typedef std::map<const Segment*, SegExtraInfo> SegMap; + struct SectionExtraInfo { + uint32_t fileOffset; + }; + typedef std::map<const Section*, SectionExtraInfo> SectionMap; + + const NormalizedFile &_file; + std::error_code _ec; + uint8_t *_buffer; + const bool _is64; + const bool _swap; + const bool _bigEndianArch; + uint64_t _seg1addr; + uint32_t _startOfLoadCommands; + uint32_t _countOfLoadCommands; + uint32_t _endOfLoadCommands; + uint32_t _startOfRelocations; + uint32_t _startOfDataInCode; + uint32_t _startOfSymbols; + uint32_t _startOfIndirectSymbols; + uint32_t _startOfSymbolStrings; + uint32_t _endOfSymbolStrings; + uint32_t _symbolTableLocalsStartIndex; + uint32_t _symbolTableGlobalsStartIndex; + uint32_t _symbolTableUndefinesStartIndex; + uint32_t _symbolStringPoolSize; + uint32_t _symbolTableSize; + uint32_t _dataInCodeSize; + uint32_t _indirectSymbolTableCount; + // Used in object file creation only + uint32_t _startOfSectionsContent; + uint32_t _endOfSectionsContent; + // Used in final linked image only + uint32_t _startOfLinkEdit; + uint32_t _startOfRebaseInfo; + uint32_t _endOfRebaseInfo; + uint32_t _startOfBindingInfo; + uint32_t _endOfBindingInfo; + uint32_t _startOfLazyBindingInfo; + uint32_t _endOfLazyBindingInfo; + uint32_t _startOfExportTrie; + uint32_t _endOfExportTrie; + uint32_t _endOfLinkEdit; + uint64_t _addressOfLinkEdit; + SegMap _segInfo; + SectionMap _sectInfo; + ByteBuffer _rebaseInfo; + ByteBuffer _bindingInfo; + ByteBuffer _lazyBindingInfo; + ByteBuffer _weakBindingInfo; + ByteBuffer _exportTrie; +}; + +size_t headerAndLoadCommandsSize(const NormalizedFile &file) { + MachOFileLayout layout(file); + return layout.headerAndLoadCommandsSize(); +} + +StringRef MachOFileLayout::dyldPath() { + return "/usr/lib/dyld"; +} + +uint32_t MachOFileLayout::pointerAlign(uint32_t value) { + return llvm::RoundUpToAlignment(value, _is64 ? 8 : 4); +} + + +size_t MachOFileLayout::headerAndLoadCommandsSize() const { + return _endOfLoadCommands; +} + + +MachOFileLayout::MachOFileLayout(const NormalizedFile &file) + : _file(file), + _is64(MachOLinkingContext::is64Bit(file.arch)), + _swap(!MachOLinkingContext::isHostEndian(file.arch)), + _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), + _seg1addr(INT64_MAX) { + _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); + const size_t segCommandBaseSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); + if (file.fileType == llvm::MachO::MH_OBJECT) { + // object files have just one segment load command containing all sections + _endOfLoadCommands = _startOfLoadCommands + + segCommandBaseSize + + file.sections.size() * sectsSize + + sizeof(symtab_command); + _countOfLoadCommands = 2; + if (!_file.dataInCode.empty()) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + // Assign file offsets to each section. + _startOfSectionsContent = _endOfLoadCommands; + unsigned relocCount = 0; + uint64_t offset = _startOfSectionsContent; + for (const Section § : file.sections) { + if (sect.type != llvm::MachO::S_ZEROFILL) { + offset = llvm::RoundUpToAlignment(offset, 1 << sect.alignment); + _sectInfo[§].fileOffset = offset; + offset += sect.content.size(); + } else { + _sectInfo[§].fileOffset = 0; + } + relocCount += sect.relocations.size(); + } + _endOfSectionsContent = offset; + + computeSymbolTableSizes(); + computeDataInCodeSize(); + + // Align start of relocations. + _startOfRelocations = pointerAlign(_endOfSectionsContent); + _startOfDataInCode = _startOfRelocations + relocCount * 8; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + // Add Indirect symbol table. + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + // Align start of symbol table and symbol strings. + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfRelocations=" << _startOfRelocations << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " startOfSectionsContent=" << _startOfSectionsContent << "\n" + << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); + } else { + // Final linked images have one load command per segment. + _endOfLoadCommands = _startOfLoadCommands + + loadCommandsSize(_countOfLoadCommands); + + // Assign section file offsets. + buildFileOffsets(); + buildLinkEditInfo(); + + // LINKEDIT of final linked images has in order: + // rebase info, binding info, lazy binding info, weak binding info, + // data-in-code, symbol table, indirect symbol table, symbol table strings. + _startOfRebaseInfo = _startOfLinkEdit; + _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); + _startOfBindingInfo = _endOfRebaseInfo; + _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); + _startOfLazyBindingInfo = _endOfBindingInfo; + _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); + _startOfExportTrie = _endOfLazyBindingInfo; + _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); + _startOfDataInCode = _endOfExportTrie; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfLinkEdit=" << _startOfLinkEdit << "\n" + << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" + << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" + << " startOfBindingInfo=" << _startOfBindingInfo << "\n" + << " endOfBindingInfo=" << _endOfBindingInfo << "\n" + << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" + << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfExportTrie=" << _startOfExportTrie << "\n" + << " endOfExportTrie=" << _endOfExportTrie << "\n" + << " startOfDataInCode=" << _startOfDataInCode << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); + } +} + +uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { + uint32_t size = 0; + count = 0; + + const size_t segCommandSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); + + // Add LC_SEGMENT for each segment. + size += _file.segments.size() * segCommandSize; + count += _file.segments.size(); + // Add section record for each section. + size += _file.sections.size() * sectionSize; + // Add one LC_SEGMENT for implicit __LINKEDIT segment + size += segCommandSize; + ++count; + + // If creating a dylib, add LC_ID_DYLIB. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); + ++count; + } + + // Add LC_DYLD_INFO + size += sizeof(dyld_info_command); + ++count; + + // Add LC_SYMTAB + size += sizeof(symtab_command); + ++count; + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + size += sizeof(dysymtab_command); + ++count; + } + + // If main executable add LC_LOAD_DYLINKER and LC_MAIN + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); + ++count; + size += sizeof(entry_point_command); + ++count; + } + + // Add LC_LOAD_DYLIB for each dependent dylib. + for (const DependentDylib &dep : _file.dependentDylibs) { + size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + ++count; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + size += sizeof(rpath_command) + pointerAlign(path.size()+1); + ++count; + } + + // Add LC_DATA_IN_CODE if needed + if (!_file.dataInCode.empty()) { + size += sizeof(linkedit_data_command); + ++count; + } + + return size; +} + +static bool overlaps(const Segment &s1, const Segment &s2) { + if (s2.address >= s1.address+s1.size) + return false; + if (s1.address >= s2.address+s2.size) + return false; + return true; +} + +static bool overlaps(const Section &s1, const Section &s2) { + if (s2.address >= s1.address+s1.content.size()) + return false; + if (s1.address >= s2.address+s2.content.size()) + return false; + return true; +} + +void MachOFileLayout::buildFileOffsets() { + // Verify no segments overlap + for (const Segment &sg1 : _file.segments) { + for (const Segment &sg2 : _file.segments) { + if (&sg1 == &sg2) + continue; + if (overlaps(sg1,sg2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Verify no sections overlap + for (const Section &s1 : _file.sections) { + for (const Section &s2 : _file.sections) { + if (&s1 == &s2) + continue; + if (overlaps(s1,s2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Build side table of extra info about segments and sections. + SegExtraInfo t; + t.fileOffset = 0; + for (const Segment &sg : _file.segments) { + _segInfo[&sg] = t; + } + SectionExtraInfo t2; + t2.fileOffset = 0; + // Assign sections to segments. + for (const Section &s : _file.sections) { + _sectInfo[&s] = t2; + bool foundSegment = false; + for (const Segment &sg : _file.segments) { + if (sg.name.equals(s.segmentName)) { + if ((s.address >= sg.address) + && (s.address+s.content.size() <= sg.address+sg.size)) { + _segInfo[&sg].sections.push_back(&s); + foundSegment = true; + break; + } + } + } + if (!foundSegment) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + + // Assign file offsets. + uint32_t fileOffset = 0; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "buildFileOffsets()\n"); + for (const Segment &sg : _file.segments) { + _segInfo[&sg].fileOffset = fileOffset; + if ((_seg1addr == INT64_MAX) && sg.access) + _seg1addr = sg.address; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " segment=" << sg.name + << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); + + uint32_t segFileSize = 0; + // A segment that is not zero-fill must use a least one page of disk space. + if (sg.access) + segFileSize = _file.pageSize; + for (const Section *s : _segInfo[&sg].sections) { + uint32_t sectOffset = s->address - sg.address; + uint32_t sectFileSize = + s->type == llvm::MachO::S_ZEROFILL ? 0 : s->content.size(); + segFileSize = std::max(segFileSize, sectOffset + sectFileSize); + + _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " section=" << s->sectionName + << ", fileOffset=" << fileOffset << "\n"); + } + + _segInfo[&sg].fileSize = llvm::RoundUpToAlignment(segFileSize, + _file.pageSize); + fileOffset = llvm::RoundUpToAlignment(fileOffset + segFileSize, + _file.pageSize); + _addressOfLinkEdit = sg.address + sg.size; + } + _startOfLinkEdit = fileOffset; +} + + +size_t MachOFileLayout::size() const { + return _endOfSymbolStrings; +} + +void MachOFileLayout::writeMachHeader() { + mach_header *mh = reinterpret_cast<mach_header*>(_buffer); + mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; + mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); + mh->cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); + mh->filetype = _file.fileType; + mh->ncmds = _countOfLoadCommands; + mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; + mh->flags = _file.flags; + if (_swap) + swapStruct(*mh); +} + +uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, + uint32_t &index) { + if (sect.indirectSymbols.empty()) + return 0; + uint32_t result = index; + index += sect.indirectSymbols.size(); + return result; +} + +uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { + if (sect.indirectSymbols.empty()) + return 0; + if (sect.type != S_SYMBOL_STUBS) + return 0; + return sect.content.size() / sect.indirectSymbols.size(); +} + +template <typename T> +std::error_code MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { + typename T::command* seg = reinterpret_cast<typename T::command*>(lc); + seg->cmd = T::LC; + seg->cmdsize = sizeof(typename T::command) + + _file.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->vmaddr = 0; + seg->vmsize = _file.sections.back().address + + _file.sections.back().content.size(); + seg->fileoff = _endOfLoadCommands; + seg->filesize = seg->vmsize; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + seg->flags = 0; + if (_swap) + swapStruct(*seg); + typename T::section *sout = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + uint32_t relOffset = _startOfRelocations; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = _sectInfo[&sin].fileOffset; + sout->align = sin.alignment; + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return std::error_code(); +} + +template <typename T> +std::error_code MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Write segment command with trailing sections. + SegExtraInfo &segInfo = _segInfo[&seg]; + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command) + + segInfo.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = segInfo.fileSize; + cmd->maxprot = seg.access; + cmd->initprot = seg.access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + typename T::section *sect = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + if (section->type == llvm::MachO::S_ZEROFILL) + sect->offset = 0; + else + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = section->alignment; + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast<uint8_t*>(next); + } + // Add implicit __LINKEDIT segment + size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command); + uint8_t *next = lc + cmd->cmdsize; + setString16("__LINKEDIT", cmd->segname); + cmd->vmaddr = _addressOfLinkEdit; + cmd->vmsize = llvm::RoundUpToAlignment(linkeditSize, _file.pageSize); + cmd->fileoff = _startOfLinkEdit; + cmd->filesize = linkeditSize; + cmd->maxprot = VM_PROT_READ; + cmd->initprot = VM_PROT_READ; + cmd->nsects = 0; + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + lc = next; + return std::error_code(); +} + +std::error_code MachOFileLayout::writeLoadCommands() { + std::error_code ec; + uint8_t *lc = &_buffer[_startOfLoadCommands]; + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have one unnamed segment which holds all sections. + if (_is64) + ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc); + else + ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc); + // Add LC_SYMTAB with symbol table info + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + // Add LC_DATA_IN_CODE if needed. + if (_dataInCodeSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } else { + // Final linked images have sections under segments. + if (_is64) + ec = writeSegmentLoadCommands<MachO64Trait>(lc); + else + ec = writeSegmentLoadCommands<MachO32Trait>(lc); + + // Add LC_ID_DYLIB command for dynamic libraries. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + dylib_command *dc = reinterpret_cast<dylib_command*>(lc); + StringRef path = _file.installName; + uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); + dc->cmd = LC_ID_DYLIB; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_LOAD_DYLIB + dc->dylib.timestamp = 1; + dc->dylib.current_version = _file.currentVersion; + dc->dylib.compatibility_version = _file.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); + lc[sizeof(dylib_command) + path.size()] = '\0'; + lc += size; + } + + // Add LC_DYLD_INFO_ONLY. + dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); + di->cmd = LC_DYLD_INFO_ONLY; + di->cmdsize = sizeof(dyld_info_command); + di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; + di->rebase_size = _rebaseInfo.size(); + di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; + di->bind_size = _bindingInfo.size(); + di->weak_bind_off = 0; + di->weak_bind_size = 0; + di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; + di->lazy_bind_size = _lazyBindingInfo.size(); + di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; + di->export_size = _exportTrie.size(); + if (_swap) + swapStruct(*di); + lc += sizeof(dyld_info_command); + + // Add LC_SYMTAB with symbol table info. + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); + dst->cmd = LC_DYSYMTAB; + dst->cmdsize = sizeof(dysymtab_command); + dst->ilocalsym = _symbolTableLocalsStartIndex; + dst->nlocalsym = _file.localSymbols.size(); + dst->iextdefsym = _symbolTableGlobalsStartIndex; + dst->nextdefsym = _file.globalSymbols.size(); + dst->iundefsym = _symbolTableUndefinesStartIndex; + dst->nundefsym = _file.undefinedSymbols.size(); + dst->tocoff = 0; + dst->ntoc = 0; + dst->modtaboff = 0; + dst->nmodtab = 0; + dst->extrefsymoff = 0; + dst->nextrefsyms = 0; + dst->indirectsymoff = _startOfIndirectSymbols; + dst->nindirectsyms = _indirectSymbolTableCount; + dst->extreloff = 0; + dst->nextrel = 0; + dst->locreloff = 0; + dst->nlocrel = 0; + if (_swap) + swapStruct(*dst); + lc += sizeof(dysymtab_command); + } + + // If main executable, add LC_LOAD_DYLINKER and LC_MAIN. + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_LOAD_DYLINKER load command. + uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); + dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); + dl->cmd = LC_LOAD_DYLINKER; + dl->cmdsize = size; + dl->name = sizeof(dylinker_command); // offset + if (_swap) + swapStruct(*dl); + memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); + lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; + lc += size; + // Build LC_MAIN load command. + entry_point_command* ep = reinterpret_cast<entry_point_command*>(lc); + ep->cmd = LC_MAIN; + ep->cmdsize = sizeof(entry_point_command); + ep->entryoff = _file.entryAddress - _seg1addr; + ep->stacksize = 0; + if (_swap) + swapStruct(*ep); + lc += sizeof(entry_point_command); + } + + // Add LC_LOAD_DYLIB commands + for (const DependentDylib &dep : _file.dependentDylibs) { + dylib_command* dc = reinterpret_cast<dylib_command*>(lc); + uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + dc->cmd = dep.kind; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_ID_DYLIB + dc->dylib.timestamp = 2; + dc->dylib.current_version = dep.currentVersion; + dc->dylib.compatibility_version = dep.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); + lc[sizeof(dylib_command)+dep.path.size()] = '\0'; + lc += size; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); + uint32_t size = sizeof(rpath_command) + pointerAlign(path.size()+1); + rpc->cmd = LC_RPATH; + rpc->cmdsize = size; + rpc->path = sizeof(rpath_command); // offset + if (_swap) + swapStruct(*rpc); + memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); + lc[sizeof(rpath_command)+path.size()] = '\0'; + lc += size; + } + + // Add LC_DATA_IN_CODE if needed. + if (_dataInCodeSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } + return ec; +} + + +void MachOFileLayout::writeSectionContent() { + for (const Section &s : _file.sections) { + // Copy all section content to output buffer. + if (s.type == llvm::MachO::S_ZEROFILL) + continue; + if (s.content.empty()) + continue; + uint32_t offset = _sectInfo[&s].fileOffset; + uint8_t *p = &_buffer[offset]; + memcpy(p, &s.content[0], s.content.size()); + p += s.content.size(); + } +} + +void MachOFileLayout::writeRelocations() { + uint32_t relOffset = _startOfRelocations; + for (Section sect : _file.sections) { + for (Relocation r : sect.relocations) { + any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( + &_buffer[relOffset]); + *rb = packRelocation(r, _swap, _bigEndianArch); + relOffset += sizeof(any_relocation_info); + } + } +} + + +void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset) { + for (const Symbol &sym : symbols) { + if (_is64) { + nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist_64); + } else { + nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist); + } + memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); + strOffset += sym.name.size(); + _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. + } +} + +void MachOFileLayout::writeDataInCodeInfo() { + uint32_t offset = _startOfDataInCode; + for (const DataInCode &entry : _file.dataInCode) { + data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( + &_buffer[offset]); + dst->offset = entry.offset; + dst->length = entry.length; + dst->kind = entry.kind; + if (_swap) + swapStruct(*dst); + offset += sizeof(data_in_code_entry); + } +} + +void MachOFileLayout::writeSymbolTable() { + // Write symbol table and symbol strings in parallel. + uint32_t symOffset = _startOfSymbols; + uint32_t strOffset = _startOfSymbolStrings; + _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name. + appendSymbols(_file.localSymbols, symOffset, strOffset); + appendSymbols(_file.globalSymbols, symOffset, strOffset); + appendSymbols(_file.undefinedSymbols, symOffset, strOffset); + // Write indirect symbol table array. + uint32_t *indirects = reinterpret_cast<uint32_t*> + (&_buffer[_startOfIndirectSymbols]); + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have sections in same order as input normalized file. + for (const Section §ion : _file.sections) { + for (uint32_t index : section.indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } else { + // Final linked images must sort sections from normalized file. + for (const Segment &seg : _file.segments) { + SegExtraInfo &segInfo = _segInfo[&seg]; + for (const Section *section : segInfo.sections) { + for (uint32_t index : section->indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } + } +} + +void MachOFileLayout::writeRebaseInfo() { + memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); +} + +void MachOFileLayout::writeBindingInfo() { + memcpy(&_buffer[_startOfBindingInfo], + _bindingInfo.bytes(), _bindingInfo.size()); +} + +void MachOFileLayout::writeLazyBindingInfo() { + memcpy(&_buffer[_startOfLazyBindingInfo], + _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); +} + +void MachOFileLayout::writeExportInfo() { + memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); +} + +void MachOFileLayout::buildLinkEditInfo() { + buildRebaseInfo(); + buildBindInfo(); + buildLazyBindInfo(); + buildExportTrie(); + computeSymbolTableSizes(); + computeDataInCodeSize(); +} + +void MachOFileLayout::buildSectionRelocations() { + +} + +void MachOFileLayout::buildRebaseInfo() { + // TODO: compress rebasing info. + for (const RebaseLocation& entry : _file.rebasingInfo) { + _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); + _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _rebaseInfo.append_uleb128(entry.segOffset); + _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); + } + _rebaseInfo.append_byte(REBASE_OPCODE_DONE); + _rebaseInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildBindInfo() { + // TODO: compress bind info. + uint64_t lastAddend = 0; + for (const BindLocation& entry : _file.bindingInfo) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | entry.ordinal); + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + if (entry.addend != lastAddend) { + _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); + _bindingInfo.append_sleb128(entry.addend); + lastAddend = entry.addend; + } + _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _bindingInfo.append_byte(BIND_OPCODE_DONE); + _bindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildLazyBindInfo() { + for (const BindLocation& entry : _file.lazyBindingInfo) { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _lazyBindingInfo.append_uleb128Fixed(entry.segOffset, 5); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | entry.ordinal); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _lazyBindingInfo.append_string(entry.symbolName); + _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + } + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + _lazyBindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::TrieNode::addSymbol(const Export& entry, + BumpPtrAllocator &allocator, + std::vector<TrieNode*> &allNodes) { + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addSymbol(entry, allocator, allNodes); + return; + } + // See if string has commmon prefix with existing edge. + for (int n=edgeStr.size()-1; n > 0; --n) { + if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { + // Splice in new node: was A -> C, now A -> B -> C + StringRef bNodeStr = edge._child->_cummulativeString; + bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); + TrieNode* bNode = new (allocator) TrieNode(bNodeStr); + allNodes.push_back(bNode); + TrieNode* cNode = edge._child; + StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); + StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "splice in TrieNode('" << bNodeStr + << "') between edge '" + << abEdgeStr << "' and edge='" + << bcEdgeStr<< "'\n"); + TrieEdge& abEdge = edge; + abEdge._subString = abEdgeStr; + abEdge._child = bNode; + TrieEdge *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); + bNode->_children.push_back(std::move(*bcEdge)); + bNode->addSymbol(entry, allocator, allNodes); + return; + } + } + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + assert(entry.otherOffset != 0); + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + assert(entry.otherOffset != 0); + } + // No commonality with any existing child, make a new edge. + TrieNode* newNode = new (allocator) TrieNode(entry.name.copy(allocator)); + TrieEdge *newEdge = new (allocator) TrieEdge(partialStr, newNode); + _children.push_back(std::move(*newEdge)); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "new TrieNode('" << entry.name << "') with edge '" + << partialStr << "' from node='" + << _cummulativeString << "'\n"); + newNode->_address = entry.offset; + newNode->_flags = entry.flags | entry.kind; + newNode->_other = entry.otherOffset; + if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) + newNode->_importedName = entry.otherName.copy(allocator); + newNode->_hasExportInfo = true; + allNodes.push_back(newNode); +} + +bool MachOFileLayout::TrieNode::updateOffset(uint32_t& offset) { + uint32_t nodeSize = 1; // Length when no export info + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + nodeSize = llvm::getULEB128Size(_flags); + nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. + nodeSize += _importedName.size(); + ++nodeSize; // Trailing zero in imported name. + } else { + nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); + if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + nodeSize += llvm::getULEB128Size(_other); + } + // Overall node size so far is uleb128 of export info + actual export info. + nodeSize += llvm::getULEB128Size(nodeSize); + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of chidren. + for (TrieEdge &edge : _children) { + nodeSize += edge._subString.size() + 1 // String length. + + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. + } + // On input, 'offset' is new prefered location for this node. + bool result = (_trieOffset != offset); + // Store new location in node object for use by parents. + _trieOffset = offset; + // Update offset for next iteration. + offset += nodeSize; + // Return true if _trieOffset was changed. + return result; +} + +void MachOFileLayout::TrieNode::appendToByteBuffer(ByteBuffer &out) { + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + if (!_importedName.empty()) { + // nodes with re-export info: size, flags, ordinal, import-name + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + + _importedName.size() + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_string(_importedName); + } else { + // nodes without re-export info: size, flags, ordinal, empty-string + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_byte(0); + } + } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { + // Nodes with export info: size, flags, address, other + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address) + + llvm::getULEB128Size(_other); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + out.append_uleb128(_other); + } else { + // Nodes with export info: size, flags, address + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + } + } else { + // Node with no export info. + uint32_t nodeSize = 0; + out.append_byte(nodeSize); + } + // Add number of children. + assert(_children.size() < 256); + out.append_byte(_children.size()); + // Append each child edge substring and node offset. + for (TrieEdge &edge : _children) { + out.append_string(edge._subString); + out.append_uleb128(edge._child->_trieOffset); + } +} + +void MachOFileLayout::buildExportTrie() { + if (_file.exportInfo.empty()) + return; + + // For all temporary strings and objects used building trie. + BumpPtrAllocator allocator; + + // Build trie of all exported symbols. + TrieNode* rootNode = new (allocator) TrieNode(StringRef()); + std::vector<TrieNode*> allNodes; + allNodes.reserve(_file.exportInfo.size()*2); + allNodes.push_back(rootNode); + for (const Export& entry : _file.exportInfo) { + rootNode->addSymbol(entry, allocator, allNodes); + } + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + bool more; + do { + uint32_t offset = 0; + more = false; + for (TrieNode* node : allNodes) { + if (node->updateOffset(offset)) + more = true; + } + } while (more); + + // Serialize trie to ByteBuffer. + for (TrieNode* node : allNodes) { + node->appendToByteBuffer(_exportTrie); + } + _exportTrie.align(_is64 ? 8 : 4); +} + + +void MachOFileLayout::computeSymbolTableSizes() { + // MachO symbol tables have three ranges: locals, globals, and undefines + const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); + _symbolTableSize = nlistSize * (_file.localSymbols.size() + + _file.globalSymbols.size() + + _file.undefinedSymbols.size()); + _symbolStringPoolSize = 0; + for (const Symbol &sym : _file.localSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.globalSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.undefinedSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + _symbolTableLocalsStartIndex = 0; + _symbolTableGlobalsStartIndex = _file.localSymbols.size(); + _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + + _file.globalSymbols.size(); + + _indirectSymbolTableCount = 0; + for (const Section § : _file.sections) { + _indirectSymbolTableCount += sect.indirectSymbols.size(); + } +} + +void MachOFileLayout::computeDataInCodeSize() { + _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); +} + +void MachOFileLayout::writeLinkEditContent() { + if (_file.fileType == llvm::MachO::MH_OBJECT) { + writeRelocations(); + writeDataInCodeInfo(); + writeSymbolTable(); + } else { + writeRebaseInfo(); + writeBindingInfo(); + writeLazyBindingInfo(); + // TODO: add weak binding info + writeExportInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } +} + +std::error_code MachOFileLayout::writeBinary(StringRef path) { + // Check for pending error from constructor. + if (_ec) + return _ec; + // Create FileOutputBuffer with calculated size. + std::unique_ptr<llvm::FileOutputBuffer> fob; + unsigned flags = 0; + if (_file.fileType != llvm::MachO::MH_OBJECT) + flags = llvm::FileOutputBuffer::F_executable; + std::error_code ec; + ec = llvm::FileOutputBuffer::create(path, size(), fob, flags); + if (ec) + return ec; + + // Write content. + _buffer = fob->getBufferStart(); + writeMachHeader(); + ec = writeLoadCommands(); + if (ec) + return ec; + writeSectionContent(); + writeLinkEditContent(); + fob->commit(); + + return std::error_code(); +} + + +/// Takes in-memory normalized view and writes a mach-o object file. +std::error_code writeBinary(const NormalizedFile &file, StringRef path) { + MachOFileLayout layout(file); + return layout.writeBinary(path); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp new file mode 100644 index 000000000000..4d6183f71df7 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -0,0 +1,1238 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory Atoms to in-memory normalized mach-o. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// ^ +/// | +/// | +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include <map> +#include <system_error> + +using llvm::StringRef; +using llvm::isa; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using namespace lld; + +namespace { + +struct AtomInfo { + const DefinedAtom *atom; + uint64_t offsetInSection; +}; + +struct SectionInfo { + SectionInfo(StringRef seg, StringRef sect, SectionType type, + const MachOLinkingContext &ctxt, uint32_t attr=0); + + StringRef segmentName; + StringRef sectionName; + SectionType type; + uint32_t attributes; + uint64_t address; + uint64_t size; + uint32_t alignment; + std::vector<AtomInfo> atomsAndOffsets; + uint32_t normalizedSectionIndex; + uint32_t finalSectionIndex; +}; + +SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, + const MachOLinkingContext &ctxt, uint32_t attrs) + : segmentName(sg), sectionName(sct), type(t), attributes(attrs), + address(0), size(0), alignment(0), + normalizedSectionIndex(0), finalSectionIndex(0) { + uint8_t align; + if (ctxt.sectionAligned(segmentName, sectionName, align)) { + alignment = align; + } +} + +struct SegmentInfo { + SegmentInfo(StringRef name); + + StringRef name; + uint64_t address; + uint64_t size; + uint32_t access; + std::vector<SectionInfo*> sections; + uint32_t normalizedSegmentIndex; +}; + +SegmentInfo::SegmentInfo(StringRef n) + : name(n), address(0), size(0), access(0), normalizedSegmentIndex(0) { +} + + +class Util { +public: + Util(const MachOLinkingContext &ctxt) + : _context(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr) {} + ~Util(); + + void assignAtomsToSections(const lld::File &atomFile); + void organizeSections(); + void assignAddressesToSections(const NormalizedFile &file); + uint32_t fileFlags(); + void copySegmentInfo(NormalizedFile &file); + void copySectionInfo(NormalizedFile &file); + void updateSectionInfo(NormalizedFile &file); + void buildAtomToAddressMap(); + std::error_code addSymbols(const lld::File &atomFile, NormalizedFile &file); + void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); + void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); + void addExportInfo(const lld::File &, NormalizedFile &file); + void addSectionRelocs(const lld::File &, NormalizedFile &file); + void buildDataInCodeArray(const lld::File &, NormalizedFile &file); + void addDependentDylibs(const lld::File &, NormalizedFile &file); + void copyEntryPointAddress(NormalizedFile &file); + void copySectionContent(NormalizedFile &file); + +private: + typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection; + typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; + + struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; + typedef llvm::StringMap<DylibInfo> DylibPathToInfo; + + SectionInfo *sectionForAtom(const DefinedAtom*); + SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); + SectionInfo *getFinalSection(DefinedAtom::ContentType type); + void appendAtom(SectionInfo *sect, const DefinedAtom *atom); + SegmentInfo *segmentForName(StringRef segName); + void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); + void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); + void copySectionContent(SectionInfo *si, ContentBytes &content); + uint16_t descBits(const DefinedAtom* atom); + int dylibOrdinal(const SharedLibraryAtom *sa); + void segIndexForSection(const SectionInfo *sect, + uint8_t &segmentIndex, uint64_t &segmentStartAddr); + const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); + const Atom *targetOfStub(const DefinedAtom *stubAtom); + std::error_code getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &symbolScope); + void appendSection(SectionInfo *si, NormalizedFile &file); + uint32_t sectionIndexForAtom(const Atom *atom); + + static uint64_t alignTo(uint64_t value, uint8_t align2); + typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex; + struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; + struct AtomSorter { + bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); + }; + struct SegmentSorter { + bool operator()(const SegmentInfo *left, const SegmentInfo *right); + static unsigned weight(const SegmentInfo *); + }; + struct TextSectionSorter { + bool operator()(const SectionInfo *left, const SectionInfo *right); + static unsigned weight(const SectionInfo *); + }; + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + llvm::BumpPtrAllocator _allocator; + std::vector<SectionInfo*> _sectionInfos; + std::vector<SegmentInfo*> _segmentInfos; + TypeToSection _sectionMap; + std::vector<SectionInfo*> _customSections; + AtomToAddress _atomToAddress; + DylibPathToInfo _dylibInfo; + const DefinedAtom *_entryAtom; + AtomToIndex _atomToSymbolIndex; + std::vector<const Atom *> _machHeaderAliasAtoms; +}; + +Util::~Util() { + // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs + // to be deleted. + for (SectionInfo *si : _sectionInfos) { + // clear() destroys vector elements, but does not deallocate. + // Instead use swap() to deallocate vector buffer. + std::vector<AtomInfo> empty; + si->atomsAndOffsets.swap(empty); + } + // The SegmentInfo structs are BumpPtr allocated, but sections needs + // to be deleted. + for (SegmentInfo *sgi : _segmentInfos) { + std::vector<SectionInfo*> empty2; + sgi->sections.swap(empty2); + } +} + +SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + SectionAttr sectionAttrs; + + // Use same table used by when parsing .o files. + relocatableSectionInfoForContentType(type, segmentName, sectionName, + sectionType, sectionAttrs); + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(sectionName) && + sect.second->segmentName.equals(segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + SectionInfo *sect = new (_allocator) SectionInfo(segmentName, sectionName, + sectionType, _context, + sectionAttrs); + _sectionInfos.push_back(sect); + _sectionMap[type] = sect; + return sect; +} + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachOFinalSectionFromAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachOFinalSectionFromAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), + ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, + typeLazyPointer), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), +}; +#undef ENTRY + + +SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { + for (auto &p : sectsToAtomType) { + if (p.atomType != atomType) + continue; + SectionAttr sectionAttrs = 0; + switch (atomType) { + case DefinedAtom::typeCode: + case DefinedAtom::typeStub: + case DefinedAtom::typeStubHelper: + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + break; + default: + break; + } + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(p.sectionName) && + sect.second->segmentName.equals(p.segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + SectionInfo *sect = new (_allocator) SectionInfo(p.segmentName, + p.sectionName, + p.sectionType, + _context, + sectionAttrs); + _sectionInfos.push_back(sect); + _sectionMap[atomType] = sect; + return sect; + } + llvm_unreachable("content type not yet supported"); +} + + + +SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + // Section for this atom is derived from content type. + DefinedAtom::ContentType type = atom->contentType(); + auto pos = _sectionMap.find(type); + if ( pos != _sectionMap.end() ) + return pos->second; + bool rMode = (_context.outputMachOType() == llvm::MachO::MH_OBJECT); + return rMode ? getRelocatableSection(type) : getFinalSection(type); + } else { + // This atom needs to be in a custom section. + StringRef customName = atom->customSectionName(); + // Look to see if we have already allocated the needed custom section. + for(SectionInfo *sect : _customSections) { + const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; + if (firstAtom->customSectionName().equals(customName)) { + return sect; + } + } + // Not found, so need to create a new custom section. + size_t seperatorIndex = customName.find('/'); + assert(seperatorIndex != StringRef::npos); + StringRef segName = customName.slice(0, seperatorIndex); + StringRef sectName = customName.drop_front(seperatorIndex + 1); + SectionInfo *sect = new (_allocator) SectionInfo(segName, sectName, + S_REGULAR, _context); + _customSections.push_back(sect); + _sectionInfos.push_back(sect); + return sect; + } +} + + +void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { + // Figure out offset for atom in this section given alignment constraints. + uint64_t offset = sect->size; + DefinedAtom::Alignment atomAlign = atom->alignment(); + uint64_t align2 = 1 << atomAlign.powerOf2; + uint64_t requiredModulus = atomAlign.modulus; + uint64_t currentModulus = (offset % align2); + if ( currentModulus != requiredModulus ) { + if ( requiredModulus > currentModulus ) + offset += requiredModulus-currentModulus; + else + offset += align2+requiredModulus-currentModulus; + } + // Record max alignment of any atom in this section. + if ( atomAlign.powerOf2 > sect->alignment ) + sect->alignment = atomAlign.powerOf2; + // Assign atom to this section with this offset. + AtomInfo ai = {atom, offset}; + sect->atomsAndOffsets.push_back(ai); + // Update section size to include this atom. + sect->size = offset + atom->size(); +} + +void Util::assignAtomsToSections(const lld::File &atomFile) { + for (const DefinedAtom *atom : atomFile.defined()) { + if (atom->contentType() == DefinedAtom::typeMachHeader) + _machHeaderAliasAtoms.push_back(atom); + else + appendAtom(sectionForAtom(atom), atom); + } +} + +SegmentInfo *Util::segmentForName(StringRef segName) { + for (SegmentInfo *si : _segmentInfos) { + if ( si->name.equals(segName) ) + return si; + } + SegmentInfo *info = new (_allocator) SegmentInfo(segName); + if (segName.equals("__TEXT")) + info->access = VM_PROT_READ | VM_PROT_EXECUTE; + else if (segName.equals("__DATA")) + info->access = VM_PROT_READ | VM_PROT_WRITE; + else if (segName.equals("__PAGEZERO")) + info->access = 0; + _segmentInfos.push_back(info); + return info; +} + +unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { + return llvm::StringSwitch<unsigned>(seg->name) + .Case("__PAGEZERO", 1) + .Case("__TEXT", 2) + .Case("__DATA", 3) + .Default(100); +} + +bool Util::SegmentSorter::operator()(const SegmentInfo *left, + const SegmentInfo *right) { + return (weight(left) < weight(right)); +} + +unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { + return llvm::StringSwitch<unsigned>(sect->sectionName) + .Case("__text", 1) + .Case("__stubs", 2) + .Case("__stub_helper", 3) + .Case("__const", 4) + .Case("__cstring", 5) + .Case("__unwind_info", 98) + .Case("__eh_frame", 99) + .Default(10); +} + +bool Util::TextSectionSorter::operator()(const SectionInfo *left, + const SectionInfo *right) { + return (weight(left) < weight(right)); +} + + +void Util::organizeSections() { + if (_context.outputMachOType() == llvm::MachO::MH_OBJECT) { + // Leave sections ordered as normalized file specified. + uint32_t sectionIndex = 1; + for (SectionInfo *si : _sectionInfos) { + si->finalSectionIndex = sectionIndex++; + } + } else { + switch (_context.outputMachOType()){ + case llvm::MachO::MH_EXECUTE: + // Main executables, need a zero-page segment + segmentForName("__PAGEZERO"); + // Fall into next case. + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + // All dynamic code needs TEXT segment to hold the load commands. + segmentForName("__TEXT"); + break; + default: + break; + } + // Group sections into segments. + for (SectionInfo *si : _sectionInfos) { + SegmentInfo *seg = segmentForName(si->segmentName); + seg->sections.push_back(si); + } + // Sort segments. + std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); + + // Sort sections within segments. + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__TEXT")) { + std::sort(seg->sections.begin(), seg->sections.end(), + TextSectionSorter()); + } + } + + // Record final section indexes. + uint32_t segmentIndex = 0; + uint32_t sectionIndex = 1; + for (SegmentInfo *seg : _segmentInfos) { + seg->normalizedSegmentIndex = segmentIndex++; + for (SectionInfo *sect : seg->sections) { + sect->finalSectionIndex = sectionIndex++; + } + } + } + +} + +uint64_t Util::alignTo(uint64_t value, uint8_t align2) { + return llvm::RoundUpToAlignment(value, 1 << align2); +} + + +void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + for (SectionInfo *sect : seg->sections) { + sect->address = alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::RoundUpToAlignment(addr - seg->address,_context.pageSize()); +} + + +// __TEXT segment lays out backwards so padding is at front after load commands. +void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, + uint64_t &addr) { + seg->address = addr; + // Walks sections starting at end to calculate padding for start. + int64_t taddr = 0; + for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { + SectionInfo *sect = *it; + taddr -= sect->size; + taddr = taddr & (0 - (1 << sect->alignment)); + } + int64_t padding = taddr - hlcSize; + while (padding < 0) + padding += _context.pageSize(); + // Start assigning section address starting at padded offset. + addr += (padding + hlcSize); + for (SectionInfo *sect : seg->sections) { + sect->address = alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::RoundUpToAlignment(addr - seg->address,_context.pageSize()); +} + + +void Util::assignAddressesToSections(const NormalizedFile &file) { + size_t hlcSize = headerAndLoadCommandsSize(file); + uint64_t address = 0; + if (_context.outputMachOType() != llvm::MachO::MH_OBJECT) { + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__PAGEZERO")) { + seg->size = _context.pageZeroSize(); + address += seg->size; + } + else if (seg->name.equals("__TEXT")) { + // _context.baseAddress() == 0 implies it was either unspecified or + // pageZeroSize is also 0. In either case resetting address is safe. + address = _context.baseAddress() ? _context.baseAddress() : address; + layoutSectionsInTextSegment(hlcSize, seg, address); + } else + layoutSectionsInSegment(seg, address); + + address = llvm::RoundUpToAlignment(address, _context.pageSize()); + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"; + for (SegmentInfo *sgi : _segmentInfos) { + llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) + << ", size=" << llvm::format("0x%08llX", sgi->size) + << ", segment-name='" << sgi->name + << "'\n"; + for (SectionInfo *si : sgi->sections) { + llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) + << ", size=" << llvm::format("0x%08llX", si->size) + << ", section-name='" << si->sectionName + << "\n"; + } + } + ); + } else { + for (SectionInfo *sect : _sectionInfos) { + sect->address = alignTo(address, sect->alignment); + address = sect->address + sect->size; + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"; + for (SectionInfo *si : _sectionInfos) { + llvm::dbgs() << " section=" << si->sectionName + << " address= " << llvm::format("0x%08X", si->address) + << " size= " << llvm::format("0x%08X", si->size) + << "\n"; + } + ); + } +} + + +void Util::copySegmentInfo(NormalizedFile &file) { + for (SegmentInfo *sgi : _segmentInfos) { + Segment seg; + seg.name = sgi->name; + seg.address = sgi->address; + seg.size = sgi->size; + seg.access = sgi->access; + file.segments.push_back(seg); + } +} + +void Util::appendSection(SectionInfo *si, NormalizedFile &file) { + // Add new empty section to end of file.sections. + Section temp; + file.sections.push_back(std::move(temp)); + Section* normSect = &file.sections.back(); + // Copy fields to normalized section. + normSect->segmentName = si->segmentName; + normSect->sectionName = si->sectionName; + normSect->type = si->type; + normSect->attributes = si->attributes; + normSect->address = si->address; + normSect->alignment = si->alignment; + // Record where normalized section is. + si->normalizedSectionIndex = file.sections.size()-1; +} + +void Util::copySectionContent(NormalizedFile &file) { + const bool r = (_context.outputMachOType() == llvm::MachO::MH_OBJECT); + + // Utility function for ArchHandler to find address of atom in output file. + auto addrForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { + for (const SectionInfo *sectInfo : _sectionInfos) + for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) + if (atomInfo.atom == &atom) + return sectInfo->address; + llvm_unreachable("atom not assigned to section"); + }; + + for (SectionInfo *si : _sectionInfos) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + if (si->type == llvm::MachO::S_ZEROFILL) { + const uint8_t *empty = nullptr; + normSect->content = llvm::makeArrayRef(empty, si->size); + continue; + } + // Copy content from atoms to content buffer for section. + uint8_t *sectionContent = file.ownedAllocations.Allocate<uint8_t>(si->size); + normSect->content = llvm::makeArrayRef(sectionContent, si->size); + for (AtomInfo &ai : si->atomsAndOffsets) { + uint8_t *atomContent = reinterpret_cast<uint8_t*> + (§ionContent[ai.offsetInSection]); + _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, + sectionAddrForAtom, + _context.baseAddress(), atomContent); + } + } +} + + +void Util::copySectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // For final linked images, write sections grouped by segment. + if (_context.outputMachOType() != llvm::MachO::MH_OBJECT) { + for (SegmentInfo *sgi : _segmentInfos) { + for (SectionInfo *si : sgi->sections) { + appendSection(si, file); + } + } + } else { + // Object files write sections in default order. + for (SectionInfo *si : _sectionInfos) { + appendSection(si, file); + } + } +} + +void Util::updateSectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + if (_context.outputMachOType() != llvm::MachO::MH_OBJECT) { + // For final linked images, sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; + normSeg->address = sgi->address; + normSeg->size = sgi->size; + for (SectionInfo *si : sgi->sections) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + normSect->address = si->address; + } + } + } else { + // Object files write sections in default order. + for (SectionInfo *si : _sectionInfos) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + normSect->address = si->address; + } + } +} + +void Util::copyEntryPointAddress(NormalizedFile &nFile) { + if (_context.outputTypeHasEntry()) { + if (_archHandler.isThumbFunction(*_entryAtom)) + nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); + else + nFile.entryAddress = _atomToAddress[_entryAtom]; + } +} + +void Util::buildAtomToAddressMap() { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign atom addresses:\n"); + const bool lookForEntry = _context.outputTypeHasEntry(); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + _atomToAddress[info.atom] = sect->address + info.offsetInSection; + if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && + (info.atom->size() != 0) && + info.atom->name() == _context.entrySymbolName()) { + _entryAtom = info.atom; + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[info.atom]) + << " atom=" << info.atom + << " name=" << info.atom->name() << "\n"); + } + } + for (const Atom *atom : _machHeaderAliasAtoms) { + _atomToAddress[atom] = _context.baseAddress(); + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << " atom=" << atom + << " name=" << atom->name() << "\n"); + } +} + +uint16_t Util::descBits(const DefinedAtom* atom) { + uint16_t desc = 0; + switch (atom->merge()) { + case lld::DefinedAtom::mergeNo: + case lld::DefinedAtom::mergeAsTentative: + break; + case lld::DefinedAtom::mergeAsWeak: + case lld::DefinedAtom::mergeAsWeakAndAddressUsed: + desc |= N_WEAK_DEF; + break; + case lld::DefinedAtom::mergeSameNameAndSize: + case lld::DefinedAtom::mergeByLargestSection: + case lld::DefinedAtom::mergeByContent: + llvm_unreachable("Unsupported DefinedAtom::merge()"); + break; + } + if (atom->contentType() == lld::DefinedAtom::typeResolver) + desc |= N_SYMBOL_RESOLVER; + if (_archHandler.isThumbFunction(*atom)) + desc |= N_ARM_THUMB_DEF; + if (atom->deadStrip() == DefinedAtom::deadStripNever) { + if ((atom->contentType() != DefinedAtom::typeInitializerPtr) + && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) + desc |= N_NO_DEAD_STRIP; + } + return desc; +} + + +bool Util::AtomSorter::operator()(const AtomAndIndex &left, + const AtomAndIndex &right) { + return (left.atom->name().compare(right.atom->name()) < 0); +} + + +std::error_code Util::getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &scope) { + bool rMode = (_context.outputMachOType() == llvm::MachO::MH_OBJECT); + switch (atom->scope()) { + case Atom::scopeTranslationUnit: + scope = 0; + inGlobalsRegion = false; + return std::error_code(); + case Atom::scopeLinkageUnit: + if ((_context.exportMode() == MachOLinkingContext::ExportMode::whiteList) + && _context.exportSymbolNamed(atom->name())) { + return make_dynamic_error_code(Twine("cannot export hidden symbol ") + + atom->name()); + } + if (rMode) { + if (_context.keepPrivateExterns()) { + // -keep_private_externs means keep in globals region as N_PEXT. + scope = N_PEXT | N_EXT; + inGlobalsRegion = true; + return std::error_code(); + } + } + // scopeLinkageUnit symbols are no longer global once linked. + scope = N_PEXT; + inGlobalsRegion = false; + return std::error_code(); + case Atom::scopeGlobal: + if (_context.exportRestrictMode()) { + if (_context.exportSymbolNamed(atom->name())) { + scope = N_EXT; + inGlobalsRegion = true; + return std::error_code(); + } else { + scope = N_PEXT; + inGlobalsRegion = false; + return std::error_code(); + } + } else { + scope = N_EXT; + inGlobalsRegion = true; + return std::error_code(); + } + break; + } + llvm_unreachable("atom->scope() unknown enum value"); +} + +std::error_code Util::addSymbols(const lld::File &atomFile, + NormalizedFile &file) { + bool rMode = (_context.outputMachOType() == llvm::MachO::MH_OBJECT); + // Mach-O symbol table has three regions: locals, globals, undefs. + + // Add all local (non-global) symbols in address order + std::vector<AtomAndIndex> globals; + globals.reserve(512); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (!atom->name().empty()) { + SymbolScope symbolScope; + bool inGlobalsRegion; + if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ + return ec; + } + if (inGlobalsRegion) { + AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; + globals.push_back(ai); + } else { + Symbol sym; + sym.name = atom->name(); + sym.type = N_SECT; + sym.scope = symbolScope; + sym.sect = sect->finalSectionIndex; + sym.desc = descBits(atom); + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ + // Create 'Lxxx' labels for anonymous atoms if archHandler says so. + static unsigned tempNum = 1; + char tmpName[16]; + sprintf(tmpName, "L%04u", tempNum++); + StringRef tempRef(tmpName); + Symbol sym; + sym.name = tempRef.copy(file.ownedAllocations); + sym.type = N_SECT; + sym.scope = 0; + sym.sect = sect->finalSectionIndex; + sym.desc = 0; + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } + } + + // Sort global symbol alphabetically, then add to symbol table. + std::sort(globals.begin(), globals.end(), AtomSorter()); + const uint32_t globalStartIndex = file.localSymbols.size(); + for (AtomAndIndex &ai : globals) { + Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_SECT; + sym.scope = ai.scope; + sym.sect = ai.index; + sym.desc = descBits(static_cast<const DefinedAtom*>(ai.atom)); + sym.value = _atomToAddress[ai.atom]; + _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); + file.globalSymbols.push_back(sym); + } + + + // Sort undefined symbol alphabetically, then add to symbol table. + std::vector<AtomAndIndex> undefs; + undefs.reserve(128); + for (const UndefinedAtom *atom : atomFile.undefined()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + std::sort(undefs.begin(), undefs.end(), AtomSorter()); + const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); + for (AtomAndIndex &ai : undefs) { + Symbol sym; + uint16_t desc = 0; + if (!rMode) { + uint8_t ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom)); + llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); + } + sym.name = ai.atom->name(); + sym.type = N_UNDF; + sym.scope = ai.scope; + sym.sect = 0; + sym.desc = desc; + sym.value = 0; + _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; + file.undefinedSymbols.push_back(sym); + } + + return std::error_code(); +} + +const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { + for (const Reference *ref : *lpAtom) { + if (_archHandler.isLazyPointer(*ref)) { + return ref->target(); + } + } + return nullptr; +} + +const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { + for (const Reference *ref : *stubAtom) { + if (const Atom *ta = ref->target()) { + if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) { + const Atom *target = targetOfLazyPointer(lpAtom); + if (target) + return target; + } + } + } + return nullptr; +} + + +void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + switch (si->type) { + case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + bool foundTarget = false; + for (const Reference *ref : *info.atom) { + const Atom *target = ref->target(); + if (target) { + if (isa<const SharedLibraryAtom>(target)) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + foundTarget = true; + } else { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_LOCAL); + } + } + } + if (!foundTarget) { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_ABS); + } + } + break; + case llvm::MachO::S_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfLazyPointer(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + case llvm::MachO::S_SYMBOL_STUBS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfStub(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + default: + break; + } + } + +} + +void Util::addDependentDylibs(const lld::File &atomFile,NormalizedFile &nFile) { + // Scan all imported symbols and build up list of dylibs they are from. + int ordinal = 1; + for (const SharedLibraryAtom *slAtom : atomFile.sharedLibrary()) { + StringRef loadPath = slAtom->loadName(); + DylibPathToInfo::iterator pos = _dylibInfo.find(loadPath); + if (pos == _dylibInfo.end()) { + DylibInfo info; + info.ordinal = ordinal++; + info.hasWeak = slAtom->canBeNullAtRuntime(); + info.hasNonWeak = !info.hasWeak; + _dylibInfo[loadPath] = info; + DependentDylib depInfo; + depInfo.path = loadPath; + depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; + depInfo.currentVersion = _context.dylibCurrentVersion(loadPath); + depInfo.compatVersion = _context.dylibCompatVersion(loadPath); + nFile.dependentDylibs.push_back(depInfo); + } else { + if ( slAtom->canBeNullAtRuntime() ) + pos->second.hasWeak = true; + else + pos->second.hasNonWeak = true; + } + } + // Automatically weak link dylib in which all symbols are weak (canBeNull). + for (DependentDylib &dep : nFile.dependentDylibs) { + DylibInfo &info = _dylibInfo[dep.path]; + if (info.hasWeak && !info.hasNonWeak) + dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; + else if (_context.isUpwardDylib(dep.path)) + dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; + } +} + + +int Util::dylibOrdinal(const SharedLibraryAtom *sa) { + return _dylibInfo[sa->loadName()].ordinal; +} + +void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, + uint64_t &segmentStartAddr) { + segmentIndex = 0; + for (const SegmentInfo *seg : _segmentInfos) { + if ((seg->address <= sect->address) + && (seg->address+seg->size >= sect->address+sect->size)) { + segmentStartAddr = seg->address; + return; + } + ++segmentIndex; + } + llvm_unreachable("section not in any segment"); +} + + +uint32_t Util::sectionIndexForAtom(const Atom *atom) { + uint64_t address = _atomToAddress[atom]; + uint32_t index = 1; + for (const SectionInfo *si : _sectionInfos) { + if ((si->address <= address) && (address < si->address+si->size)) + return index; + ++index; + } + llvm_unreachable("atom not in any section"); +} + +void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { + if (_context.outputMachOType() != llvm::MachO::MH_OBJECT) + return; + + + // Utility function for ArchHandler to find symbol index for an atom. + auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { + auto pos = _atomToSymbolIndex.find(&atom); + assert(pos != _atomToSymbolIndex.end()); + return pos->second; + }; + + // Utility function for ArchHandler to find section index for an atom. + auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { + return sectionIndexForAtom(&atom); + }; + + // Utility function for ArchHandler to find address of atom in output file. + auto addressForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + for (const AtomInfo &info : si->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, + symIndexForAtom, + sectIndexForAtom, + addressForAtom, + normSect.relocations); + } + } + } +} + +void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + // Atoms that contain data-in-code have "transition" references + // which mark a point where the embedded data starts of ends. + // This needs to be converted to the mach-o format which is an array + // of data-in-code ranges. + uint32_t startOffset = 0; + DataRegionType mode = DataRegionType(0); + for (const Reference *ref : *info.atom) { + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + if (_archHandler.isDataInCodeTransition(ref->kindValue())) { + DataRegionType nextMode = (DataRegionType)ref->addend(); + if (mode != nextMode) { + if (mode != 0) { + // Found end data range, so make range entry. + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = ref->offsetInAtom() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + mode = nextMode; + startOffset = ref->offsetInAtom(); + } + } + if (mode != 0) { + // Function ends with data (no end transition). + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = info.atom->size() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + } +} + +void Util::addRebaseAndBindingInfo(const lld::File &atomFile, + NormalizedFile &nFile) { + if (_context.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + uint8_t segmentIndex; + uint64_t segmentStartAddr; + for (SectionInfo *sect : _sectionInfos) { + segIndexForSection(sect, segmentIndex, segmentStartAddr); + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() + - segmentStartAddr; + const Atom* targ = ref->target(); + if (_archHandler.isPointer(*ref)) { + // A pointer to a DefinedAtom requires rebasing. + if (isa<DefinedAtom>(targ)) { + RebaseLocation rebase; + rebase.segIndex = segmentIndex; + rebase.segOffset = segmentOffset; + rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; + nFile.rebasingInfo.push_back(rebase); + } + // A pointer to an SharedLibraryAtom requires binding. + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = sa->canBeNullAtRuntime(); + bind.ordinal = dylibOrdinal(sa); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.bindingInfo.push_back(bind); + } + } + else if (_archHandler.isLazyPointer(*ref)) { + BindLocation bind; + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + bind.ordinal = dylibOrdinal(sa); + } else { + bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; + } + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = false; //sa->canBeNullAtRuntime(); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.lazyBindingInfo.push_back(bind); + } + } + } + } +} + +void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { + if (_context.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (atom->scope() != Atom::scopeGlobal) + continue; + if (_context.exportRestrictMode()) { + if (!_context.exportSymbolNamed(atom->name())) + continue; + } + Export exprt; + exprt.name = atom->name(); + exprt.offset = _atomToAddress[atom]; // FIXME: subtract base address + exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (atom->merge() == DefinedAtom::mergeAsWeak) + exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + else + exprt.flags = 0; + exprt.otherOffset = 0; + exprt.otherName = StringRef(); + nFile.exportInfo.push_back(exprt); + } + } +} + +uint32_t Util::fileFlags() { + // FIXME: these need to determined at runtime. + if (_context.outputMachOType() == MH_OBJECT) { + return MH_SUBSECTIONS_VIA_SYMBOLS; + } else { + if ((_context.outputMachOType() == MH_EXECUTE) && _context.PIE()) + return MH_DYLDLINK | MH_NOUNDEFS | MH_TWOLEVEL | MH_PIE; + else + return MH_DYLDLINK | MH_NOUNDEFS | MH_TWOLEVEL; + } +} + +} // end anonymous namespace + + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Convert a set of Atoms into a normalized mach-o file. +ErrorOr<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, + const MachOLinkingContext &context) { + // The util object buffers info until the normalized file can be made. + Util util(context); + util.assignAtomsToSections(atomFile); + util.organizeSections(); + + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + NormalizedFile &normFile = *f.get(); + normFile.arch = context.arch(); + normFile.fileType = context.outputMachOType(); + normFile.flags = util.fileFlags(); + normFile.installName = context.installName(); + normFile.currentVersion = context.currentVersion(); + normFile.compatVersion = context.compatibilityVersion(); + normFile.pageSize = context.pageSize(); + normFile.rpaths = context.rpaths(); + util.addDependentDylibs(atomFile, normFile); + util.copySegmentInfo(normFile); + util.copySectionInfo(normFile); + util.assignAddressesToSections(normFile); + util.buildAtomToAddressMap(); + util.updateSectionInfo(normFile); + util.copySectionContent(normFile); + if (auto ec = util.addSymbols(atomFile, normFile)) { + return ec; + } + util.addIndirectSymbols(atomFile, normFile); + util.addRebaseAndBindingInfo(atomFile, normFile); + util.addExportInfo(atomFile, normFile); + util.addSectionRelocs(atomFile, normFile); + util.buildDataInCodeArray(atomFile, normFile); + util.copyEntryPointAddress(normFile); + + return std::move(f); +} + + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp new file mode 100644 index 000000000000..124e0eaffeeb --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -0,0 +1,911 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory normalized mach-o to in-memory Atoms. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// | +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + + +namespace { // anonymous + + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachORelocatableSectionToAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachORelocatableSectionToAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("", "", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), + ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), + ENTRY("", "", S_INTERPOSING, typeInterposingTuples), + ENTRY("__LD", "__compact_unwind", S_REGULAR, + typeCompactUnwindInfo), + ENTRY("", "", S_REGULAR, typeUnknown) +}; +#undef ENTRY + + +/// Figures out ContentType of a mach-o section. +DefinedAtom::ContentType atomTypeFromSection(const Section §ion, + bool &customSectionName) { + // First look for match of name and type. Empty names in table are wildcards. + customSectionName = false; + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->sectionType != section.type) + continue; + if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) + continue; + if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) + continue; + customSectionName = p->segmentName.empty() && p->sectionName.empty(); + return p->atomType; + } + // Look for code denoted by section attributes + if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) + return DefinedAtom::typeCode; + + return DefinedAtom::typeUnknown; +} + +enum AtomizeModel { + atomizeAtSymbols, + atomizeFixedSize, + atomizePointerSize, + atomizeUTF8, + atomizeUTF16, + atomizeCFI, + atomizeCU, + atomizeCFString +}; + +/// Returns info on how to atomize a section of the specified ContentType. +void sectionParseInfo(DefinedAtom::ContentType atomType, + unsigned int &sizeMultiple, + DefinedAtom::Scope &scope, + DefinedAtom::Merge &merge, + AtomizeModel &atomizeModel) { + struct ParseInfo { + DefinedAtom::ContentType atomType; + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + }; + + #define ENTRY(type, size, scope, merge, model) \ + {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } + + static const ParseInfo parseInfo[] = { + ENTRY(typeCode, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstant, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF8), + ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF16), + ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, + atomizeCFI), + ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, + atomizeCFString), + ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, + atomizeCU), + ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, + atomizeAtSymbols) + }; + #undef ENTRY + const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); + for (int i=0; i < tableLen; ++i) { + if (parseInfo[i].atomType == atomType) { + sizeMultiple = parseInfo[i].sizeMultiple; + scope = parseInfo[i].scope; + merge = parseInfo[i].merge; + atomizeModel = parseInfo[i].atomizeModel; + return; + } + } + + // Unknown type is atomized by symbols. + sizeMultiple = 1; + scope = DefinedAtom::scopeGlobal; + merge = DefinedAtom::mergeNo; + atomizeModel = atomizeAtSymbols; +} + + +Atom::Scope atomScope(uint8_t scope) { + switch (scope) { + case N_EXT: + return Atom::scopeGlobal; + case N_PEXT: + case N_PEXT | N_EXT: + return Atom::scopeLinkageUnit; + case 0: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("unknown scope value!"); +} + +void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, + uint32_t sectionIndex, + SmallVector<const Symbol *, 64> &outSyms) { + for (const Symbol &sym : inSymbols) { + // Only look at definition symbols. + if ((sym.type & N_TYPE) != N_SECT) + continue; + if (sym.sect != sectionIndex) + continue; + outSyms.push_back(&sym); + } +} + +void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, + MachOFile &file, uint64_t symbolAddr, StringRef symbolName, + uint16_t symbolDescFlags, Atom::Scope symbolScope, + uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { + // Mach-O symbol table does have size in it. Instead the size is the + // difference between this and the next symbol. + uint64_t size = nextSymbolAddr - symbolAddr; + uint64_t offset = symbolAddr - section.address; + bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; + if (section.type == llvm::MachO::S_ZEROFILL) { + file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, + noDeadStrip, copyRefs, §ion); + } else { + DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) + ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; + bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); + if (atomType == DefinedAtom::typeUnknown) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, + merge, thumb, noDeadStrip, offset, + size, segSectName, true, §ion); + } else { + if ((atomType == lld::DefinedAtom::typeCode) && + (symbolDescFlags & N_SYMBOL_RESOLVER)) { + atomType = lld::DefinedAtom::typeResolver; + } + file.addDefinedAtom(symbolName, symbolScope, atomType, merge, + offset, size, thumb, noDeadStrip, copyRefs, §ion); + } + } +} + +std::error_code processSymboledSection(DefinedAtom::ContentType atomType, + const Section §ion, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + // Find section's index. + uint32_t sectIndex = 1; + for (auto § : normalizedFile.sections) { + if (§ == §ion) + break; + ++sectIndex; + } + + // Find all symbols in this section. + SmallVector<const Symbol *, 64> symbols; + appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); + appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); + + // Sort symbols. + std::sort(symbols.begin(), symbols.end(), + [](const Symbol *lhs, const Symbol *rhs) -> bool { + if (lhs == rhs) + return false; + // First by address. + uint64_t lhsAddr = lhs->value; + uint64_t rhsAddr = rhs->value; + if (lhsAddr != rhsAddr) + return lhsAddr < rhsAddr; + // If same address, one is an alias so sort by scope. + Atom::Scope lScope = atomScope(lhs->scope); + Atom::Scope rScope = atomScope(rhs->scope); + if (lScope != rScope) + return lScope < rScope; + // If same address and scope, see if one might be better as + // the alias. + bool lPrivate = (lhs->name.front() == 'l'); + bool rPrivate = (rhs->name.front() == 'l'); + if (lPrivate != rPrivate) + return lPrivate; + // If same address and scope, sort by name. + return lhs->name < rhs->name; + }); + + // Debug logging of symbols. + //for (const Symbol *sym : symbols) + // llvm::errs() << " sym: " + // << llvm::format("0x%08llx ", (uint64_t)sym->value) + // << ", " << sym->name << "\n"; + + // If section has no symbols and no content, there are no atoms. + if (symbols.empty() && section.content.empty()) + return std::error_code(); + + if (symbols.empty()) { + // Section has no symbols, put all content in one anoymous atom. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, + section.address + section.content.size(), + scatterable, copyRefs); + } + else if (symbols.front()->value != section.address) { + // Section has anonymous content before first symbol. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, symbols.front()->value, + scatterable, copyRefs); + } + + const Symbol *lastSym = nullptr; + for (const Symbol *sym : symbols) { + if (lastSym != nullptr) { + // Ignore any assembler added "ltmpNNN" symbol at start of section + // if there is another symbol at the start. + if ((lastSym->value != sym->value) + || lastSym->value != section.address + || !lastSym->name.startswith("ltmp")) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), sym->value, + scatterable, copyRefs); + } + } + lastSym = sym; + } + if (lastSym != nullptr) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), + section.address + section.content.size(), + scatterable, copyRefs); + } + + // If object built without .subsections_via_symbols, add reference chain. + if (!scatterable) { + MachODefinedAtom *prevAtom = nullptr; + file.eachAtomInSection(section, + [&](MachODefinedAtom *atom, uint64_t offset)->void { + if (prevAtom) + prevAtom->addReference(0, Reference::kindLayoutAfter, atom, 0, + Reference::KindArch::all, + Reference::KindNamespace::all); + prevAtom = atom; + }); + } + + return std::error_code(); +} + +std::error_code processSection(DefinedAtom::ContentType atomType, + const Section §ion, + bool customSectionName, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + + // Get info on how to atomize section. + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); + + // Validate section size. + if ((section.content.size() % sizeMultiple) != 0) + return make_dynamic_error_code(Twine("Section ") + section.segmentName + + "/" + section.sectionName + + " has size (" + + Twine(section.content.size()) + + ") which is not a multiple of " + + Twine(sizeMultiple) ); + + if (atomizeModel == atomizeAtSymbols) { + // Break section up into atoms each with a fixed size. + return processSymboledSection(atomType, section, normalizedFile, file, + scatterable, copyRefs); + } else { + unsigned int size; + for (unsigned int offset = 0, e = section.content.size(); offset != e;) { + switch (atomizeModel) { + case atomizeFixedSize: + // Break section up into atoms each with a fixed size. + size = sizeMultiple; + break; + case atomizePointerSize: + // Break section up into atoms each the size of a pointer. + size = is64 ? 8 : 4; + break; + case atomizeUTF8: + // Break section up into zero terminated c-strings. + size = 0; + for (unsigned int i = offset; i < e; ++i) { + if (section.content[i] == 0) { + size = i + 1 - offset; + break; + } + } + break; + case atomizeUTF16: + // Break section up into zero terminated UTF16 strings. + size = 0; + for (unsigned int i = offset; i < e; i += 2) { + if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { + size = i + 2 - offset; + break; + } + } + break; + case atomizeCFI: + // Break section up into dwarf unwind CFIs (FDE or CIE). + size = read32(§ion.content[offset], isBig) + 4; + if (offset+size > section.content.size()) { + return make_dynamic_error_code(Twine(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. Size of CFI " + "starting at offset (" + + Twine(offset) + + ") is past end of section.")); + } + break; + case atomizeCU: + // Break section up into compact unwind entries. + size = is64 ? 32 : 20; + break; + case atomizeCFString: + // Break section up into NS/CFString objects. + size = is64 ? 32 : 16; + break; + case atomizeAtSymbols: + break; + } + if (size == 0) { + return make_dynamic_error_code(Twine("Section ") + section.segmentName + + "/" + section.sectionName + + " is malformed. The last atom is " + "not zero terminated."); + } + if (customSectionName) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, + merge, false, false, offset, + size, segSectName, true, §ion); + } else { + file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, + false, false, copyRefs, §ion); + } + offset += size; + } + } + return std::error_code(); +} + +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + +const MachODefinedAtom * +findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, + uint64_t addr, Reference::Addend *addend) { + const Section *sect = nullptr; + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return nullptr; + + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + auto atom = + file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return atom; +} + +// Walks all relocations for a section in a normalized .o file and +// creates corresponding lld::Reference objects. +std::error_code convertRelocs(const Section §ion, + const NormalizedFile &normalizedFile, + bool scatterable, + MachOFile &file, + ArchHandler &handler) { + // Utility function for ArchHandler to find atom by its address. + auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, + const lld::Atom **atom, Reference::Addend *addend) + -> std::error_code { + if (sectIndex > normalizedFile.sections.size()) + return make_dynamic_error_code(Twine("out of range section " + "index (") + Twine(sectIndex) + ")"); + const Section *sect = nullptr; + if (sectIndex == 0) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return make_dynamic_error_code(Twine("address (" + Twine(addr) + + ") is not in any section")); + } else { + sect = &normalizedFile.sections[sectIndex-1]; + } + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return std::error_code(); + }; + + // Utility function for ArchHandler to find atom by its symbol index. + auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) + -> std::error_code { + // Find symbol from index. + const Symbol *sym = nullptr; + uint32_t numLocal = normalizedFile.localSymbols.size(); + uint32_t numGlobal = normalizedFile.globalSymbols.size(); + uint32_t numUndef = normalizedFile.undefinedSymbols.size(); + if (symbolIndex < numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex]; + } else if (symbolIndex < numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numLocal]; + } else if (symbolIndex < numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal]; + } else { + return make_dynamic_error_code(Twine("symbol index (") + + Twine(symbolIndex) + ") out of range"); + } + // Find atom from symbol. + if ((sym->type & N_TYPE) == N_SECT) { + if (sym->sect > normalizedFile.sections.size()) + return make_dynamic_error_code(Twine("symbol section index (") + + Twine(sym->sect) + ") out of range "); + const Section &symSection = normalizedFile.sections[sym->sect-1]; + uint64_t targetOffsetInSect = sym->value - symSection.address; + MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, + targetOffsetInSect); + if (target) { + *result = target; + return std::error_code(); + } + return make_dynamic_error_code(Twine("no atom found for defined symbol")); + } else if ((sym->type & N_TYPE) == N_UNDF) { + const lld::Atom *target = file.findUndefAtom(sym->name); + if (target) { + *result = target; + return std::error_code(); + } + return make_dynamic_error_code(Twine("no undefined atom found for sym")); + } else { + // Search undefs + return make_dynamic_error_code(Twine("no atom found for symbol")); + } + }; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + // Use old-school iterator so that paired relocations can be grouped. + for (auto it=section.relocations.begin(), e=section.relocations.end(); + it != e; ++it) { + const Relocation &reloc = *it; + // Find atom this relocation is in. + if (reloc.offset > section.content.size()) + return make_dynamic_error_code(Twine("r_address (") + Twine(reloc.offset) + + ") is larger than section size (" + + Twine(section.content.size()) + ")"); + uint32_t offsetInAtom; + MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, + reloc.offset, + &offsetInAtom); + assert(inAtom && "r_address in range, should have found atom"); + uint64_t fixupAddress = section.address + reloc.offset; + + const lld::Atom *target = nullptr; + Reference::Addend addend = 0; + Reference::KindValue kind; + std::error_code relocErr; + if (handler.isPairedReloc(reloc)) { + // Handle paired relocations together. + relocErr = handler.getPairReferenceInfo( + reloc, *++it, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, + atomByAddr, atomBySymbol, &kind, &target, &addend); + } + else { + // Use ArchHandler to convert relocation record into information + // needed to instantiate an lld::Reference object. + relocErr = handler.getReferenceInfo( + reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, + atomBySymbol, &kind, &target, &addend); + } + if (relocErr) { + return make_dynamic_error_code( + Twine("bad relocation (") + relocErr.message() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r_address=" + Twine::utohexstr(reloc.offset) + + ", r_type=" + Twine(reloc.type) + + ", r_extern=" + Twine(reloc.isExtern) + + ", r_length=" + Twine((int)reloc.length) + + ", r_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) + : (Twine(", r_scattered=1, r_value=") + + Twine(reloc.value))) + + ")" ); + } else { + // Instantiate an lld::Reference object and add to its atom. + inAtom->addReference(offsetInAtom, kind, target, addend, + handler.kindArch()); + } + } + + return std::error_code(); +} + +bool isDebugInfoSection(const Section §ion) { + if ((section.attributes & S_ATTR_DEBUG) == 0) + return false; + return section.segmentName.equals("__DWARF"); +} + +static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { + if (is64) + return read64(addr, isBig); + + int32_t res = read32(addr, isBig); + return res; +} + +std::error_code addEHFrameReferences(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler) { + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + + const Section *ehFrameSection = nullptr; + for (auto §ion : normalizedFile.sections) + if (section.segmentName == "__TEXT" && + section.sectionName == "__eh_frame") { + ehFrameSection = §ion; + break; + } + + // No __eh_frame so nothing to do. + if (!ehFrameSection) + return std::error_code(); + + file.eachAtomInSection(*ehFrameSection, + [&](MachODefinedAtom *atom, uint64_t offset) -> void { + assert(atom->contentType() == DefinedAtom::typeCFI); + + if (ArchHandler::isDwarfCIE(isBig, atom)) + return; + + // Compiler wasn't lazy and actually told us what it meant. + if (atom->begin() != atom->end()) + return; + + const uint8_t *frameData = atom->rawContent().data(); + uint32_t size = read32(frameData, isBig); + uint64_t cieFieldInFDE = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + + // Linker needs to fixup a reference from the FDE to its parent CIE (a + // 32-bit byte offset backwards in the __eh_frame section). + uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); + uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; + cieAddress -= cieDelta; + + Reference::Addend addend; + const Atom *cie = + findAtomCoveringAddress(normalizedFile, file, cieAddress, &addend); + atom->addReference(cieFieldInFDE, handler.unwindRefToCIEKind(), cie, + addend, handler.kindArch()); + + // Linker needs to fixup reference from the FDE to the function it's + // describing. FIXME: there are actually different ways to do this, and the + // particular method used is specified in the CIE's augmentation fields + // (hopefully) + uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); + + int64_t functionFromFDE = readSPtr(is64, isBig, frameData + rangeFieldInFDE); + uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; + rangeStart += functionFromFDE; + + const Atom *func = + findAtomCoveringAddress(normalizedFile, file, rangeStart, &addend); + atom->addReference(rangeFieldInFDE, handler.unwindRefToFunctionKind(), func, + addend, handler.kindArch()); + }); + return std::error_code(); +} + + +/// Converts normalized mach-o file into an lld::File and lld::Atoms. +ErrorOr<std::unique_ptr<lld::File>> +objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + std::unique_ptr<MachOFile> file(new MachOFile(path)); + if (std::error_code ec = normalizedObjectToAtoms( + file.get(), normalizedFile, copyRefs)) + return ec; + return std::unique_ptr<File>(std::move(file)); +} + +ErrorOr<std::unique_ptr<lld::File>> +dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + // Instantiate SharedLibraryFile object. + std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); + normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs); + return std::unique_ptr<File>(std::move(file)); +} + +} // anonymous namespace + +namespace normalized { + +std::error_code +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); + + // Create atoms from each section. + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + bool customSectionName; + DefinedAtom::ContentType atomType = atomTypeFromSection(sect, + customSectionName); + if (std::error_code ec = + processSection(atomType, sect, customSectionName, normalizedFile, + *file, scatterable, copyRefs)) + return ec; + } + // Create atoms from undefined symbols. + for (auto &sym : normalizedFile.undefinedSymbols) { + // Undefinded symbols with n_value != 0 are actually tentative definitions. + if (sym.value == Hex64(0)) { + file->addUndefinedAtom(sym.name, copyRefs); + } else { + file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, + DefinedAtom::Alignment(sym.desc >> 8), copyRefs); + } + } + + // Convert mach-o relocations to References + std::unique_ptr<mach_o::ArchHandler> handler + = ArchHandler::create(normalizedFile.arch); + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + if (std::error_code ec = convertRelocs(sect, normalizedFile, scatterable, + *file, *handler)) + return ec; + } + + // Add additional arch-specific References + file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { + handler->addAdditionalReferences(*atom); + }); + + // Each __eh_frame section needs references to both __text (the function we're + // providing unwind info for) and itself (FDE -> CIE). These aren't + // represented in the relocations on some architectures, so we have to add + // them back in manually there. + if (std::error_code ec = addEHFrameReferences(normalizedFile, *file, *handler)) + return ec; + + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return make_dynamic_error_code(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return make_dynamic_error_code(Twine("LC_DATA_IN_CODE entry (offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(offsetInAtom, + handler->dataInCodeTransitionStart(*atom), atom, + entry.kind, handler->kindArch()); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(offsetInAtom+entry.length, + handler->dataInCodeTransitionEnd(*atom), atom, 0, + handler->kindArch()); + } + + // Sort references in each atom to their canonical order. + for (const DefinedAtom* defAtom : file->defined()) { + reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); + } + return std::error_code(); +} + +std::error_code +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + file->setInstallName(normalizedFile.installName); + file->setCompatVersion(normalizedFile.compatVersion); + file->setCurrentVersion(normalizedFile.currentVersion); + + // Tell MachODylibFile object about all symbols it exports. + if (!normalizedFile.exportInfo.empty()) { + // If exports trie exists, use it instead of traditional symbol table. + for (const Export &exp : normalizedFile.exportInfo) { + bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + // StringRefs from export iterator are ephemeral, so force copy. + file->addExportedSymbol(exp.name, weakDef, true); + } + } else { + for (auto &sym : normalizedFile.globalSymbols) { + assert((sym.scope & N_EXT) && "only expect external symbols here"); + bool weakDef = (sym.desc & N_WEAK_DEF); + file->addExportedSymbol(sym.name, weakDef, copyRefs); + } + } + // Tell MachODylibFile object about all dylibs it re-exports. + for (const DependentDylib &dep : normalizedFile.dependentDylibs) { + if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) + file->addReExportedDylib(dep.path); + } + return std::error_code(); +} + +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs) { + + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->atomType != atomType) + continue; + // Wild carded entries are ignored for reverse lookups. + if (p->segmentName.empty() || p->sectionName.empty()) + continue; + segmentName = p->segmentName; + sectionName = p->sectionName; + sectionType = p->sectionType; + sectionAttrs = 0; + if (atomType == DefinedAtom::typeCode) + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + return; + } + llvm_unreachable("content type not yet supported"); +} + +ErrorOr<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + switch (normalizedFile.fileType) { + case MH_DYLIB: + case MH_DYLIB_STUB: + return dylibToAtoms(normalizedFile, path, copyRefs); + case MH_OBJECT: + return objectToAtoms(normalizedFile, path, copyRefs); + default: + llvm_unreachable("unhandled MachO file type!"); + } +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp new file mode 100644 index 000000000000..ae14d755e2b9 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -0,0 +1,802 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation uses YAML I/O to +/// provide the convert between YAML and the normalized mach-o (NM). +/// +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + + +using llvm::StringRef; +using namespace llvm::yaml; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using lld::YamlContext; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) +LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) +LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(Export) +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) + + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// A vector of Sections is a sequence. +template<> +struct SequenceTraits< std::vector<Section> > { + static size_t size(IO &io, std::vector<Section> &seq) { + return seq.size(); + } + static Section& element(IO &io, std::vector<Section> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +template<> +struct SequenceTraits< std::vector<Symbol> > { + static size_t size(IO &io, std::vector<Symbol> &seq) { + return seq.size(); + } + static Symbol& element(IO &io, std::vector<Symbol> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// A vector of Relocations is a sequence. +template<> +struct SequenceTraits< Relocations > { + static size_t size(IO &io, Relocations &seq) { + return seq.size(); + } + static Relocation& element(IO &io, Relocations &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// The content for a section is represented as a flow sequence of hex bytes. +template<> +struct SequenceTraits< ContentBytes > { + static size_t size(IO &io, ContentBytes &seq) { + return seq.size(); + } + static Hex8& element(IO &io, ContentBytes &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +// The indirect symbols for a section is represented as a flow sequence +// of numbers (symbol table indexes). +template<> +struct SequenceTraits< IndirectSymbols > { + static size_t size(IO &io, IndirectSymbols &seq) { + return seq.size(); + } + static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::Arch> { + static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { + io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); + io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); + io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); + io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); + io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); + io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); + io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); + io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::OS> { + static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { + io.enumCase(value, "unknown", + lld::MachOLinkingContext::OS::unknown); + io.enumCase(value, "Mac OS X", + lld::MachOLinkingContext::OS::macOSX); + io.enumCase(value, "iOS", + lld::MachOLinkingContext::OS::iOS); + io.enumCase(value, "iOS Simulator", + lld::MachOLinkingContext::OS::iOS_simulator); + } +}; + + +template <> +struct ScalarEnumerationTraits<HeaderFileType> { + static void enumeration(IO &io, HeaderFileType &value) { + io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); + io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); + io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); + io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); + } +}; + + +template <> +struct ScalarBitSetTraits<FileFlags> { + static void bitset(IO &io, FileFlags &value) { + io.bitSetCase(value, "MH_TWOLEVEL", + llvm::MachO::MH_TWOLEVEL); + io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", + llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + } +}; + + +template <> +struct ScalarEnumerationTraits<SectionType> { + static void enumeration(IO &io, SectionType &value) { + io.enumCase(value, "S_REGULAR", + llvm::MachO::S_REGULAR); + io.enumCase(value, "S_ZEROFILL", + llvm::MachO::S_ZEROFILL); + io.enumCase(value, "S_CSTRING_LITERALS", + llvm::MachO::S_CSTRING_LITERALS); + io.enumCase(value, "S_4BYTE_LITERALS", + llvm::MachO::S_4BYTE_LITERALS); + io.enumCase(value, "S_8BYTE_LITERALS", + llvm::MachO::S_8BYTE_LITERALS); + io.enumCase(value, "S_LITERAL_POINTERS", + llvm::MachO::S_LITERAL_POINTERS); + io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_SYMBOL_STUBS", + llvm::MachO::S_SYMBOL_STUBS); + io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", + llvm::MachO::S_MOD_INIT_FUNC_POINTERS); + io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", + llvm::MachO::S_MOD_TERM_FUNC_POINTERS); + io.enumCase(value, "S_COALESCED", + llvm::MachO::S_COALESCED); + io.enumCase(value, "S_GB_ZEROFILL", + llvm::MachO::S_GB_ZEROFILL); + io.enumCase(value, "S_INTERPOSING", + llvm::MachO::S_INTERPOSING); + io.enumCase(value, "S_16BYTE_LITERALS", + llvm::MachO::S_16BYTE_LITERALS); + io.enumCase(value, "S_DTRACE_DOF", + llvm::MachO::S_DTRACE_DOF); + io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_REGULAR", + llvm::MachO::S_THREAD_LOCAL_REGULAR); + io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", + llvm::MachO::S_THREAD_LOCAL_ZEROFILL); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", + llvm::MachO::S_THREAD_LOCAL_VARIABLES); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", + llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", + llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } +}; + +template <> +struct ScalarBitSetTraits<SectionAttr> { + static void bitset(IO &io, SectionAttr &value) { + io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", + llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", + llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", + llvm::MachO::S_ATTR_NO_DEAD_STRIP); + io.bitSetCase(value, "S_ATTR_EXT_RELOC", + llvm::MachO::S_ATTR_EXT_RELOC); + io.bitSetCase(value, "S_ATTR_LOC_RELOC", + llvm::MachO::S_ATTR_LOC_RELOC); + } +}; + +template <> +struct ScalarEnumerationTraits<NListType> { + static void enumeration(IO &io, NListType &value) { + io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); + io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); + io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); + io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); + io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolScope> { + static void bitset(IO &io, SymbolScope &value) { + io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); + io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolDesc> { + static void bitset(IO &io, SymbolDesc &value) { + io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); + io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); + io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); + io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); + io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Section> { + struct NormalizedContentBytes; + static void mapping(IO &io, Section §) { + io.mapRequired("segment", sect.segmentName); + io.mapRequired("section", sect.sectionName); + io.mapRequired("type", sect.type); + io.mapOptional("attributes", sect.attributes); + io.mapOptional("alignment", sect.alignment, 0U); + io.mapRequired("address", sect.address); + if (sect.type == llvm::MachO::S_ZEROFILL) { + // S_ZEROFILL sections use "size:" instead of "content:" + uint64_t size = sect.content.size(); + io.mapOptional("size", size); + if (!io.outputting()) { + uint8_t *bytes = nullptr; + sect.content = makeArrayRef(bytes, size); + } + } else { + MappingNormalization<NormalizedContent, ArrayRef<uint8_t>> content( + io, sect.content); + io.mapOptional("content", content->_normalizedContent); + } + io.mapOptional("relocations", sect.relocations); + io.mapOptional("indirect-syms", sect.indirectSymbols); + } + + struct NormalizedContent { + NormalizedContent(IO &io) : _io(io) {} + NormalizedContent(IO &io, ArrayRef<uint8_t> content) : _io(io) { + // When writing yaml, copy content byte array to Hex8 vector. + for (auto &c : content) { + _normalizedContent.push_back(c); + } + } + ArrayRef<uint8_t> denormalize(IO &io) { + // When reading yaml, allocate byte array owned by NormalizedFile and + // copy Hex8 vector to byte array. + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + size_t size = _normalizedContent.size(); + uint8_t *bytes = file->ownedAllocations.Allocate<uint8_t>(size); + std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); + return makeArrayRef(bytes, size); + } + + IO &_io; + ContentBytes _normalizedContent; + }; +}; + + +template <> +struct MappingTraits<Relocation> { + static void mapping(IO &io, Relocation &reloc) { + io.mapRequired("offset", reloc.offset); + io.mapOptional("scattered", reloc.scattered, false); + io.mapRequired("type", reloc.type); + io.mapRequired("length", reloc.length); + io.mapRequired("pc-rel", reloc.pcRel); + if ( !reloc.scattered ) + io.mapRequired("extern", reloc.isExtern); + if ( reloc.scattered ) + io.mapRequired("value", reloc.value); + if ( !reloc.scattered ) + io.mapRequired("symbol", reloc.symbol); + } +}; + + +template <> +struct ScalarEnumerationTraits<RelocationInfoType> { + static void enumeration(IO &io, RelocationInfoType &value) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + switch (file->arch) { + case lld::MachOLinkingContext::arch_x86_64: + io.enumCase(value, "X86_64_RELOC_UNSIGNED", + llvm::MachO::X86_64_RELOC_UNSIGNED); + io.enumCase(value, "X86_64_RELOC_SIGNED", + llvm::MachO::X86_64_RELOC_SIGNED); + io.enumCase(value, "X86_64_RELOC_BRANCH", + llvm::MachO::X86_64_RELOC_BRANCH); + io.enumCase(value, "X86_64_RELOC_GOT_LOAD", + llvm::MachO::X86_64_RELOC_GOT_LOAD); + io.enumCase(value, "X86_64_RELOC_GOT", + llvm::MachO::X86_64_RELOC_GOT); + io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", + llvm::MachO::X86_64_RELOC_SUBTRACTOR); + io.enumCase(value, "X86_64_RELOC_SIGNED_1", + llvm::MachO::X86_64_RELOC_SIGNED_1); + io.enumCase(value, "X86_64_RELOC_SIGNED_2", + llvm::MachO::X86_64_RELOC_SIGNED_2); + io.enumCase(value, "X86_64_RELOC_SIGNED_4", + llvm::MachO::X86_64_RELOC_SIGNED_4); + io.enumCase(value, "X86_64_RELOC_TLV", + llvm::MachO::X86_64_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_x86: + io.enumCase(value, "GENERIC_RELOC_VANILLA", + llvm::MachO::GENERIC_RELOC_VANILLA); + io.enumCase(value, "GENERIC_RELOC_PAIR", + llvm::MachO::GENERIC_RELOC_PAIR); + io.enumCase(value, "GENERIC_RELOC_SECTDIFF", + llvm::MachO::GENERIC_RELOC_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", + llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_TLV", + llvm::MachO::GENERIC_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_armv6: + case lld::MachOLinkingContext::arch_armv7: + case lld::MachOLinkingContext::arch_armv7s: + io.enumCase(value, "ARM_RELOC_VANILLA", + llvm::MachO::ARM_RELOC_VANILLA); + io.enumCase(value, "ARM_RELOC_PAIR", + llvm::MachO::ARM_RELOC_PAIR); + io.enumCase(value, "ARM_RELOC_SECTDIFF", + llvm::MachO::ARM_RELOC_SECTDIFF); + io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", + llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "ARM_RELOC_BR24", + llvm::MachO::ARM_RELOC_BR24); + io.enumCase(value, "ARM_THUMB_RELOC_BR22", + llvm::MachO::ARM_THUMB_RELOC_BR22); + io.enumCase(value, "ARM_RELOC_HALF", + llvm::MachO::ARM_RELOC_HALF); + io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", + llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + break; + case lld::MachOLinkingContext::arch_arm64: + io.enumCase(value, "ARM64_RELOC_UNSIGNED", + llvm::MachO::ARM64_RELOC_UNSIGNED); + io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", + llvm::MachO::ARM64_RELOC_SUBTRACTOR); + io.enumCase(value, "ARM64_RELOC_BRANCH26", + llvm::MachO::ARM64_RELOC_BRANCH26); + io.enumCase(value, "ARM64_RELOC_PAGE21", + llvm::MachO::ARM64_RELOC_PAGE21); + io.enumCase(value, "ARM64_RELOC_PAGEOFF12", + llvm::MachO::ARM64_RELOC_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", + llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_ADDEND", + llvm::MachO::ARM64_RELOC_ADDEND); + break; + default: + llvm_unreachable("unknown architecture"); + } + } +}; + + +template <> +struct MappingTraits<Symbol> { + static void mapping(IO &io, Symbol& sym) { + io.mapRequired("name", sym.name); + io.mapRequired("type", sym.type); + io.mapOptional("scope", sym.scope, SymbolScope(0)); + io.mapOptional("sect", sym.sect, (uint8_t)0); + if (sym.type == llvm::MachO::N_UNDF) { + // In undef symbols, desc field contains alignment/ordinal info + // which is better represented as a hex vaule. + uint16_t t1 = sym.desc; + Hex16 t2 = t1; + io.mapOptional("desc", t2, Hex16(0)); + sym.desc = t2; + } else { + // In defined symbols, desc fit is a set of option bits. + io.mapOptional("desc", sym.desc, SymbolDesc(0)); + } + io.mapRequired("value", sym.value); + } +}; + +// Custom mapping for VMProtect (e.g. "r-x"). +template <> +struct ScalarTraits<VMProtect> { + static void output(const VMProtect &value, void*, raw_ostream &out) { + out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); + out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); + out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); + } + static StringRef input(StringRef scalar, void*, VMProtect &value) { + value = 0; + if (scalar.size() != 3) + return "segment access protection must be three chars (e.g. \"r-x\")"; + switch (scalar[0]) { + case 'r': + value = llvm::MachO::VM_PROT_READ; + break; + case '-': + break; + default: + return "segment access protection first char must be 'r' or '-'"; + } + switch (scalar[1]) { + case 'w': + value = value | llvm::MachO::VM_PROT_WRITE; + break; + case '-': + break; + default: + return "segment access protection second char must be 'w' or '-'"; + } + switch (scalar[2]) { + case 'x': + value = value | llvm::MachO::VM_PROT_EXECUTE; + break; + case '-': + break; + default: + return "segment access protection third char must be 'x' or '-'"; + } + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + + +template <> +struct MappingTraits<Segment> { + static void mapping(IO &io, Segment& seg) { + io.mapRequired("name", seg.name); + io.mapRequired("address", seg.address); + io.mapRequired("size", seg.size); + io.mapRequired("access", seg.access); + } +}; + +template <> +struct ScalarEnumerationTraits<LoadCommandType> { + static void enumeration(IO &io, LoadCommandType &value) { + io.enumCase(value, "LC_LOAD_DYLIB", + llvm::MachO::LC_LOAD_DYLIB); + io.enumCase(value, "LC_LOAD_WEAK_DYLIB", + llvm::MachO::LC_LOAD_WEAK_DYLIB); + io.enumCase(value, "LC_REEXPORT_DYLIB", + llvm::MachO::LC_REEXPORT_DYLIB); + io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", + llvm::MachO::LC_LOAD_UPWARD_DYLIB); + io.enumCase(value, "LC_LAZY_LOAD_DYLIB", + llvm::MachO::LC_LAZY_LOAD_DYLIB); + } +}; + +template <> +struct MappingTraits<DependentDylib> { + static void mapping(IO &io, DependentDylib& dylib) { + io.mapRequired("path", dylib.path); + io.mapOptional("kind", dylib.kind, + llvm::MachO::LC_LOAD_DYLIB); + io.mapOptional("compat-version", dylib.compatVersion, + PackedVersion(0x10000)); + io.mapOptional("current-version", dylib.currentVersion, + PackedVersion(0x10000)); + } +}; + +template <> +struct ScalarEnumerationTraits<RebaseType> { + static void enumeration(IO &io, RebaseType &value) { + io.enumCase(value, "REBASE_TYPE_POINTER", + llvm::MachO::REBASE_TYPE_POINTER); + io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", + llvm::MachO::REBASE_TYPE_TEXT_PCREL32); + io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); + } +}; + + +template <> +struct MappingTraits<RebaseLocation> { + static void mapping(IO &io, RebaseLocation& rebase) { + io.mapRequired("segment-index", rebase.segIndex); + io.mapRequired("segment-offset", rebase.segOffset); + io.mapOptional("kind", rebase.kind, + llvm::MachO::REBASE_TYPE_POINTER); + } +}; + + + +template <> +struct ScalarEnumerationTraits<BindType> { + static void enumeration(IO &io, BindType &value) { + io.enumCase(value, "BIND_TYPE_POINTER", + llvm::MachO::BIND_TYPE_POINTER); + io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); + io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", + llvm::MachO::BIND_TYPE_TEXT_PCREL32); + } +}; + +template <> +struct MappingTraits<BindLocation> { + static void mapping(IO &io, BindLocation &bind) { + io.mapRequired("segment-index", bind.segIndex); + io.mapRequired("segment-offset", bind.segOffset); + io.mapOptional("kind", bind.kind, + llvm::MachO::BIND_TYPE_POINTER); + io.mapOptional("can-be-null", bind.canBeNull, false); + io.mapRequired("ordinal", bind.ordinal); + io.mapRequired("symbol-name", bind.symbolName); + io.mapOptional("addend", bind.addend, Hex64(0)); + } +}; + + +template <> +struct ScalarEnumerationTraits<ExportSymbolKind> { + static void enumeration(IO &io, ExportSymbolKind &value) { + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + } +}; + +template <> +struct ScalarBitSetTraits<ExportFlags> { + static void bitset(IO &io, ExportFlags &value) { + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", + llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", + llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", + llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Export> { + static void mapping(IO &io, Export &exp) { + io.mapRequired("name", exp.name); + io.mapOptional("offset", exp.offset); + io.mapOptional("kind", exp.kind, + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + if (!io.outputting() || exp.flags) + io.mapOptional("flags", exp.flags); + io.mapOptional("other", exp.otherOffset, Hex32(0)); + io.mapOptional("other-name", exp.otherName, StringRef()); + } +}; + +template <> +struct ScalarEnumerationTraits<DataRegionType> { + static void enumeration(IO &io, DataRegionType &value) { + io.enumCase(value, "DICE_KIND_DATA", + llvm::MachO::DICE_KIND_DATA); + io.enumCase(value, "DICE_KIND_JUMP_TABLE8", + llvm::MachO::DICE_KIND_JUMP_TABLE8); + io.enumCase(value, "DICE_KIND_JUMP_TABLE16", + llvm::MachO::DICE_KIND_JUMP_TABLE16); + io.enumCase(value, "DICE_KIND_JUMP_TABLE32", + llvm::MachO::DICE_KIND_JUMP_TABLE32); + io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", + llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); + } +}; + +template <> +struct MappingTraits<DataInCode> { + static void mapping(IO &io, DataInCode &entry) { + io.mapRequired("offset", entry.offset); + io.mapRequired("length", entry.length); + io.mapRequired("kind", entry.kind); + } +}; + +template <> +struct ScalarTraits<PackedVersion> { + static void output(const PackedVersion &value, void*, raw_ostream &out) { + out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); + if (value & 0xFF) { + out << llvm::format(".%d", (value & 0xFF)); + } + } + static StringRef input(StringRef scalar, void*, PackedVersion &result) { + uint32_t value; + if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) + return "malformed version number"; + result = value; + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + +template <> +struct MappingTraits<NormalizedFile> { + static void mapping(IO &io, NormalizedFile &file) { + io.mapRequired("arch", file.arch); + io.mapRequired("file-type", file.fileType); + io.mapOptional("flags", file.flags); + io.mapOptional("dependents", file.dependentDylibs); + io.mapOptional("install-name", file.installName, StringRef()); + io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); + io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); + io.mapOptional("has-UUID", file.hasUUID, true); + io.mapOptional("rpaths", file.rpaths); + io.mapOptional("entry-point", file.entryAddress, Hex64(0)); + io.mapOptional("source-version", file.sourceVersion, Hex64(0)); + io.mapOptional("OS", file.os); + io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); + io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); + io.mapOptional("segments", file.segments); + io.mapOptional("sections", file.sections); + io.mapOptional("local-symbols", file.localSymbols); + io.mapOptional("global-symbols", file.globalSymbols); + io.mapOptional("undefined-symbols",file.undefinedSymbols); + io.mapOptional("page-size", file.pageSize, Hex32(4096)); + io.mapOptional("rebasings", file.rebasingInfo); + io.mapOptional("bindings", file.bindingInfo); + io.mapOptional("weak-bindings", file.weakBindingInfo); + io.mapOptional("lazy-bindings", file.lazyBindingInfo); + io.mapOptional("exports", file.exportInfo); + io.mapOptional("dataInCode", file.dataInCode); + } + static StringRef validate(IO &io, NormalizedFile &file) { + return StringRef(); + } +}; + +} // namespace llvm +} // namespace yaml + + +namespace lld { +namespace mach_o { + +/// Handles !mach-o tagged yaml documents. +bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, + const lld::File *&file) const { + if (!io.mapTag("!mach-o")) + return false; + // Step 1: parse yaml into normalized mach-o struct. + NormalizedFile nf; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_normalizeMachOFile == nullptr); + info->_normalizeMachOFile = &nf; + MappingTraits<NormalizedFile>::mapping(io, nf); + // Step 2: parse normalized mach-o struct into atoms. + ErrorOr<std::unique_ptr<lld::File>> foe = normalizedToAtoms(nf, info->_path, + true); + if (nf.arch != _arch) { + io.setError(Twine("file is wrong architecture. Expected (" + + MachOLinkingContext::nameFromArch(_arch) + + ") found (" + + MachOLinkingContext::nameFromArch(nf.arch) + + ")")); + return false; + } + info->_normalizeMachOFile = nullptr; + + if (foe) { + // Transfer ownership to "out" File parameter. + std::unique_ptr<lld::File> f = std::move(foe.get()); + file = f.release(); + return true; + } else { + io.setError(foe.getError().message()); + return false; + } +} + + + +namespace normalized { + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +ErrorOr<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + // Create YAML Input parser. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f.get(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill NormalizedFile by parsing yaml. + yin >> *f; + + // Return error if there were parsing problems. + if (yin.error()) + return make_error_code(lld::YamlReaderError::illegal_value); + + // Hand ownership of instantiated NormalizedFile to caller. + return std::move(f); +} + + +/// Writes a yaml encoded mach-o files from an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { + // YAML I/O is not const aware, so need to cast away ;-( + NormalizedFile *f = const_cast<NormalizedFile*>(&file); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f; + llvm::yaml::Output yout(out, &yamlContext); + + // Stream out yaml. + yout << *f; + + return std::error_code(); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld + diff --git a/lib/ReaderWriter/MachO/MachOPasses.h b/lib/ReaderWriter/MachO/MachOPasses.h new file mode 100644 index 000000000000..86f4bc0f5d54 --- /dev/null +++ b/lib/ReaderWriter/MachO/MachOPasses.h @@ -0,0 +1,28 @@ +//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_PASSES_H +#define LLD_READER_WRITER_MACHO_PASSES_H + +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx); +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx); +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx); +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx); +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx); + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_PASSES_H diff --git a/lib/ReaderWriter/MachO/Makefile b/lib/ReaderWriter/MachO/Makefile new file mode 100644 index 000000000000..1acd578ba9d3 --- /dev/null +++ b/lib/ReaderWriter/MachO/Makefile @@ -0,0 +1,14 @@ +##===- lld/lib/ReaderWriter/MachO/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldMachO +USEDLIBS = lldCore.a + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/MachO/ShimPass.cpp b/lib/ReaderWriter/MachO/ShimPass.cpp new file mode 100644 index 000000000000..a8c69f8ceace --- /dev/null +++ b/lib/ReaderWriter/MachO/ShimPass.cpp @@ -0,0 +1,129 @@ +//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates branch-sites whose target is a different mode +// (thumb vs arm). +// +// Arm code has two instruction encodings thumb and arm. When branching from +// one code encoding to another, you need to use an instruction that switches +// the instruction mode. Usually the transition only happens at call sites, and +// the linker can transform a BL instruction in BLX (or vice versa). But if the +// compiler did a tail call optimization and a function ends with a branch (not +// branch and link), there is no pc-rel BX instruction. +// +// The ShimPass looks for pc-rel B instructions that will need to switch mode. +// For those cases it synthesizes a shim which does the transition, then +// modifies the original atom with the B instruction to target to the shim atom. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +class ShimPass : public Pass { +public: + ShimPass(const MachOLinkingContext &context) + : _context(context) + , _archHandler(_context.archHandler()) + , _stubInfo(_archHandler.stubInfo()) + , _file("<mach-o shim pass>") { + } + + + void perform(std::unique_ptr<MutableFile> &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile->defined()) { + for (const Reference *ref : *atom) { + // Look at non-call branches. + if (!_archHandler.isNonCallBranch(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (const lld::DefinedAtom *daTarget = dyn_cast<DefinedAtom>(target)) { + bool atomIsThumb = _archHandler.isThumbFunction(*atom); + bool targetIsThumb = _archHandler.isThumbFunction(*daTarget); + if (atomIsThumb != targetIsThumb) + updateBranchToUseShim(atomIsThumb, *daTarget, ref); + } + } + } + // Exit early if no shims needed. + if (_targetToShim.empty()) + return; + + // Sort shim atoms so the layout order is stable. + std::vector<const DefinedAtom *> shims; + shims.reserve(_targetToShim.size()); + for (auto element : _targetToShim) { + shims.push_back(element.second); + } + std::sort(shims.begin(), shims.end(), + [](const DefinedAtom *l, const DefinedAtom *r) { + return (l->name() < r->name()); + }); + + // Add all shims to master file. + for (const DefinedAtom *shim : shims) { + mergedFile->addAtom(*shim); + } + } + +private: + + void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target, + const Reference *ref) { + // Make file-format specific stub and other support atoms. + const DefinedAtom *shim = this->getShim(thumbToArm, target); + assert(shim != nullptr); + // Switch branch site to target shim atom. + const_cast<Reference *>(ref)->setTarget(shim); + } + + const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) { + auto pos = _targetToShim.find(&target); + if ( pos != _targetToShim.end() ) { + // Reuse an existing shim. + assert(pos->second != nullptr); + return pos->second; + } else { + // There is no existing shim, so create a new one. + const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm, + target); + _targetToShim[&target] = shim; + return shim; + } + } + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile _file; + llvm::DenseMap<const Atom*, const DefinedAtom*> _targetToShim; +}; + + + +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<ShimPass>(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lib/ReaderWriter/MachO/StubsPass.cpp b/lib/ReaderWriter/MachO/StubsPass.cpp new file mode 100644 index 000000000000..bc4d9c2087f3 --- /dev/null +++ b/lib/ReaderWriter/MachO/StubsPass.cpp @@ -0,0 +1,373 @@ +//===- lib/ReaderWriter/MachO/StubsPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates call-sites which have references to shared library +// atoms to instead have a reference to a stub (PLT entry) for the specified +// symbol. Each file format defines a subclass of StubsPass which implements +// the abstract methods for creating the file format specific StubAtoms. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + + +namespace lld { +namespace mach_o { + + +// +// Lazy Pointer Atom created by the stubs pass. +// +class LazyPointerAtom : public SimpleDefinedAtom { +public: + LazyPointerAtom(const File &file, bool is64) + : SimpleDefinedAtom(file), _is64(is64) { } + + ContentType contentType() const override { + return DefinedAtom::typeLazyPointer; + } + + Alignment alignment() const override { + return Alignment(_is64 ? 3 : 2); + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; +}; + + +// +// NonLazyPointer (GOT) Atom created by the stubs pass. +// +class NonLazyPointerAtom : public SimpleDefinedAtom { +public: + NonLazyPointerAtom(const File &file, bool is64) + : SimpleDefinedAtom(file), _is64(is64) { } + + ContentType contentType() const override { + return DefinedAtom::typeGOT; + } + + Alignment alignment() const override { + return Alignment(_is64 ? 3 : 2); + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; +}; + + + +// +// Stub Atom created by the stubs pass. +// +class StubAtom : public SimpleDefinedAtom { +public: + StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo){ } + + ContentType contentType() const override { + return DefinedAtom::typeStub; + } + + Alignment alignment() const override { + return Alignment(_stubInfo.codeAlignment); + } + + uint64_t size() const override { + return _stubInfo.stubSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + + +// +// Stub Helper Atom created by the stubs pass. +// +class StubHelperAtom : public SimpleDefinedAtom { +public: + StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return Alignment(_stubInfo.codeAlignment); + } + + uint64_t size() const override { + return _stubInfo.stubHelperSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperBytes, + _stubInfo.stubHelperSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + + +// +// Stub Helper Common Atom created by the stubs pass. +// +class StubHelperCommonAtom : public SimpleDefinedAtom { +public: + StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return Alignment(_stubInfo.codeAlignment); + } + + uint64_t size() const override { + return _stubInfo.stubHelperCommonSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes, + _stubInfo.stubHelperCommonSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + + +class StubsPass : public Pass { +public: + StubsPass(const MachOLinkingContext &context) + : _context(context), _archHandler(_context.archHandler()), + _stubInfo(_archHandler.stubInfo()), _file("<mach-o Stubs pass>") { } + + + void perform(std::unique_ptr<MutableFile> &mergedFile) override { + // Skip this pass if output format uses text relocations instead of stubs. + if (!this->noTextRelocs()) + return; + + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile->defined()) { + for (const Reference *ref : *atom) { + // Look at call-sites. + if (!this->isCallSite(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (isa<SharedLibraryAtom>(target)) { + // Calls to shared libraries go through stubs. + _targetToUses[target].push_back(ref); + continue; + } + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){ + // Calls to interposable functions in same linkage unit must also go + // through a stub. + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + _targetToUses[target].push_back(ref); + } + } + } + + // Exit early if no stubs needed. + if (_targetToUses.empty()) + return; + + // First add help-common and GOT slots used by lazy binding. + SimpleDefinedAtom *helperCommonAtom = + new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo); + SimpleDefinedAtom *helperCacheNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _context.is64Bit()); + SimpleDefinedAtom *helperBinderNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _context.is64Bit()); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + helperCacheNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + helperBinderNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom); + mergedFile->addAtom(*helperCommonAtom); + mergedFile->addAtom(*helperBinderNLPAtom); + mergedFile->addAtom(*helperCacheNLPAtom); + + // Add reference to dyld_stub_binder in libSystem.dylib + auto I = std::find_if( + mergedFile->sharedLibrary().begin(), mergedFile->sharedLibrary().end(), + [&](const SharedLibraryAtom *atom) { + return atom->name().equals(_stubInfo.binderSymbolName); + }); + assert(I != mergedFile->sharedLibrary().end() && "dyld_stub_binder not found"); + addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I); + + // Sort targets by name, so stubs and lazy pointers are consistent + std::vector<const Atom *> targetsNeedingStubs; + for (auto it : _targetToUses) + targetsNeedingStubs.push_back(it.first); + std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(), + [](const Atom * left, const Atom * right) { + return (left->name().compare(right->name()) < 0); + }); + + // Make and append stubs, lazy pointers, and helpers in alphabetical order. + unsigned lazyOffset = 0; + for (const Atom *target : targetsNeedingStubs) { + StubAtom *stub = new (_file.allocator()) StubAtom(_file, _stubInfo); + LazyPointerAtom *lp = + new (_file.allocator()) LazyPointerAtom(_file, _context.is64Bit()); + StubHelperAtom *helper = + new (_file.allocator()) StubHelperAtom(_file, _stubInfo); + + addReference(stub, _stubInfo.stubReferenceToLP, lp); + addOptReference(stub, _stubInfo.stubReferenceToLP, + _stubInfo.optStubReferenceToLP, lp); + addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper); + addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target); + addReference(helper, _stubInfo.stubHelperReferenceToImm, helper); + addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper, + lazyOffset); + addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon, + helperCommonAtom); + + mergedFile->addAtom(*stub); + mergedFile->addAtom(*lp); + mergedFile->addAtom(*helper); + + // Update each reference to use stub. + for (const Reference *ref : _targetToUses[target]) { + assert(ref->target() == target); + // Switch call site to reference stub atom instead. + const_cast<Reference *>(ref)->setTarget(stub); + } + + // Calculate new offset + lazyOffset += target->name().size() + 12; + } + } + +private: + + bool noTextRelocs() { + return true; + } + + bool isCallSite(const Reference &ref) { + return _archHandler.isCallSite(ref); + } + + void addReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom* target) { + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, refInfo.kind, refInfo.offset, + target, refInfo.addend); + } + + void addReferenceAddend(SimpleDefinedAtom *atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom *target, uint64_t addend) { + atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch, + refInfo.kind, refInfo.offset, target, addend); + } + + void addOptReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const ArchHandler::OptionalRefInfo &optRef, + const lld::Atom* target) { + if (!optRef.used) + return; + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, optRef.kind, optRef.offset, + target, optRef.addend); + } + + typedef llvm::DenseMap<const Atom*, + llvm::SmallVector<const Reference *, 8>> TargetToUses; + + const MachOLinkingContext &_context; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile _file; + TargetToUses _targetToUses; +}; + + + +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::unique_ptr<Pass>(new StubsPass(ctx))); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/lib/ReaderWriter/MachO/WriterMachO.cpp b/lib/ReaderWriter/MachO/WriterMachO.cpp new file mode 100644 index 000000000000..de1c0e38063b --- /dev/null +++ b/lib/ReaderWriter/MachO/WriterMachO.cpp @@ -0,0 +1,72 @@ +//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExecutableAtoms.hpp" +#include "MachONormalizedFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + +using lld::mach_o::normalized::NormalizedFile; + +namespace lld { +namespace mach_o { + +class MachOWriter : public Writer { +public: + MachOWriter(const MachOLinkingContext &ctxt) : _context(ctxt) { } + + std::error_code writeFile(const lld::File &file, StringRef path) override { + // Construct empty normalized file from atoms. + ErrorOr<std::unique_ptr<NormalizedFile>> nFile = + normalized::normalizedFromAtoms(file, _context); + if (std::error_code ec = nFile.getError()) + return ec; + + // For testing, write out yaml form of normalized file. + if (_context.printAtoms()) { + std::unique_ptr<Writer> yamlWriter = createWriterYAML(_context); + yamlWriter->writeFile(file, "-"); + } + + // Write normalized file as mach-o binary. + return writeBinary(*nFile->get(), path); + } + + bool createImplicitFiles(std::vector<std::unique_ptr<File> > &r) override { + // When building main executables, add _main as required entry point. + if (_context.outputTypeHasEntry()) + r.emplace_back(new CEntryFile(_context)); + // If this can link with dylibs, need helper function (dyld_stub_binder). + if (_context.needsStubsPass()) + r.emplace_back(new StubHelperFile(_context)); + // Final linked images can access a symbol for their mach_header. + if (_context.outputMachOType() != llvm::MachO::MH_OBJECT) + r.emplace_back(new MachHeaderAliasFile(_context)); + + return true; + } +private: + const MachOLinkingContext &_context; + }; + + +} // namespace mach_o + +std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &context) { + return std::unique_ptr<Writer>(new lld::mach_o::MachOWriter(context)); +} + +} // namespace lld diff --git a/lib/ReaderWriter/Makefile b/lib/ReaderWriter/Makefile new file mode 100644 index 000000000000..23587440805f --- /dev/null +++ b/lib/ReaderWriter/Makefile @@ -0,0 +1,16 @@ +##===- lld/lib/ReaderWriter/Makefile ---------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../.. +LIBRARYNAME := lldReaderWriter + +# these link against this lib +PARALLEL_DIRS := ELF MachO Native PECOFF YAML + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/Native/CMakeLists.txt b/lib/ReaderWriter/Native/CMakeLists.txt new file mode 100644 index 000000000000..e15f3d60e89c --- /dev/null +++ b/lib/ReaderWriter/Native/CMakeLists.txt @@ -0,0 +1,7 @@ +add_llvm_library(lldNative + ReaderNative.cpp + WriterNative.cpp + LINK_LIBS + lldCore + LLVMSupport + ) diff --git a/lib/ReaderWriter/Native/Makefile b/lib/ReaderWriter/Native/Makefile new file mode 100644 index 000000000000..6aba37868900 --- /dev/null +++ b/lib/ReaderWriter/Native/Makefile @@ -0,0 +1,14 @@ +##===- lld/lib/ReaderWriter/Native/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldNative +USEDLIBS = lldCore.a + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/Native/NativeFileFormat.h b/lib/ReaderWriter/Native/NativeFileFormat.h new file mode 100644 index 000000000000..535072fe2314 --- /dev/null +++ b/lib/ReaderWriter/Native/NativeFileFormat.h @@ -0,0 +1,258 @@ +//===- lib/ReaderWriter/Native/NativeFileFormat.h -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H +#define LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H + +#include "llvm/Support/DataTypes.h" +#include <cstdint> + +namespace lld { + +// +// Overview: +// +// The number one design goal of this file format is enable the linker to +// read object files into in-memory Atom objects extremely quickly. +// The second design goal is to enable future modifications to the +// Atom attribute model. +// +// The llvm native object file format is not like traditional object file +// formats (e.g. ELF, COFF, mach-o). There is no symbol table and no +// sections. Instead the file is essentially an array of archived Atoms. +// It is *not* serialized Atoms which would require deserialization into +// in memory objects. Instead it is an array of read-only info about each +// Atom. The NativeReader bulk creates in-memory Atoms which just have +// an ivar which points to the read-only info for that Atom. No additional +// processing is done to construct the in-memory Atoms. All Atom attribute +// getter methods are virtual calls which dig up the info they need from the +// ivar data. +// +// To support the gradual evolution of Atom attributes, the Atom read-only +// data is versioned. The NativeReader chooses which in-memory Atom class +// to use based on the version. What this means is that if new attributes +// are added (or changed) in the Atom model, a new native atom class and +// read-only atom info struct needs to be defined. Then, all the existing +// native reader atom classes need to be modified to do their best effort +// to map their old style read-only data to the new Atom model. At some point +// some classes to support old versions may be dropped. +// +// +// Details: +// +// The native object file format consists of a header that specifies the +// endianness of the file and the architecture along with a list of "chunks" +// in the file. A Chunk is simply a tagged range of the file. There is +// one chunk for the array of atom infos. There is another chunk for the +// string pool, and another for the content pool. +// +// It turns out there most atoms have very similar sets of attributes, only +// the name and content attribute vary. To exploit this fact to reduce the file +// size, the atom read-only info contains just the name and content info plus +// a reference to which attribute set it uses. The attribute sets are stored +// in another chunk. +// + + +// +// An entry in the NativeFileHeader that describes one chunk of the file. +// +struct NativeChunk { + uint32_t signature; + uint32_t fileOffset; + uint32_t fileSize; + uint32_t elementCount; +}; + + +// +// The header in a native object file +// +struct NativeFileHeader { + uint8_t magic[16]; + uint32_t endian; + uint32_t architecture; + uint32_t fileSize; + uint32_t chunkCount; + // NativeChunk chunks[] +}; + +// +// Possible values for NativeChunk.signature field +// +enum NativeChunkSignatures { + NCS_DefinedAtomsV1 = 1, + NCS_AttributesArrayV1 = 2, + NCS_AbsoluteAttributesV1 = 12, + NCS_UndefinedAtomsV1 = 3, + NCS_SharedLibraryAtomsV1 = 4, + NCS_AbsoluteAtomsV1 = 5, + NCS_Strings = 6, + NCS_ReferencesArrayV1 = 7, + NCS_ReferencesArrayV2 = 8, + NCS_TargetsTable = 9, + NCS_AddendsTable = 10, + NCS_Content = 11, +}; + +// +// The 16-bytes at the start of a native object file +// +#define NATIVE_FILE_HEADER_MAGIC "llvm nat obj v1 " + +// +// Possible values for the NativeFileHeader.endian field +// +enum { + NFH_BigEndian = 0x42696745, + NFH_LittleEndian = 0x4574696c +}; + + +// +// Possible values for the NativeFileHeader.architecture field +// +enum { + NFA_x86 = 1, + NFA_x86_64 = 2, + NFA_armv6 = 3, + NFA_armv7 = 4, +}; + + +// +// The NCS_DefinedAtomsV1 chunk contains an array of these structs +// +struct NativeDefinedAtomIvarsV1 { + uint32_t nameOffset; + uint32_t attributesOffset; + uint32_t referencesStartIndex; + uint32_t referencesCount; + uint32_t contentOffset; + uint32_t contentSize; + uint64_t sectionSize; +}; + + +// +// The NCS_AttributesArrayV1 chunk contains an array of these structs +// +struct NativeAtomAttributesV1 { + uint32_t sectionNameOffset; + uint16_t align2; + uint16_t alignModulus; + uint8_t scope; + uint8_t interposable; + uint8_t merge; + uint8_t contentType; + uint8_t sectionChoice; + uint8_t deadStrip; + uint8_t dynamicExport; + uint8_t permissions; + uint8_t alias; + uint8_t codeModel; +}; + + + +// +// The NCS_UndefinedAtomsV1 chunk contains an array of these structs +// +struct NativeUndefinedAtomIvarsV1 { + uint32_t nameOffset; + uint32_t flags; + uint32_t fallbackNameOffset; +}; + + +// +// The NCS_SharedLibraryAtomsV1 chunk contains an array of these structs +// +struct NativeSharedLibraryAtomIvarsV1 { + uint64_t size; + uint32_t nameOffset; + uint32_t loadNameOffset; + uint32_t type; + uint32_t flags; +}; + + + +// +// The NCS_AbsoluteAtomsV1 chunk contains an array of these structs +// +struct NativeAbsoluteAtomIvarsV1 { + uint32_t nameOffset; + uint32_t attributesOffset; + uint32_t reserved; + uint64_t value; +}; + + + +// +// The NCS_ReferencesArrayV1 chunk contains an array of these structs +// +struct NativeReferenceIvarsV1 { + enum { + noTarget = UINT16_MAX + }; + uint32_t offsetInAtom; + uint16_t kindValue; + uint8_t kindNamespace; + uint8_t kindArch; + uint16_t targetIndex; + uint16_t addendIndex; +}; + + +// +// The NCS_ReferencesArrayV2 chunk contains an array of these structs +// +struct NativeReferenceIvarsV2 { + enum : unsigned { + noTarget = UINT32_MAX + }; + uint64_t offsetInAtom; + int64_t addend; + uint16_t kindValue; + uint8_t kindNamespace; + uint8_t kindArch; + uint32_t targetIndex; + uint32_t tag; +}; + + +// +// The NCS_TargetsTable chunk contains an array of uint32_t entries. +// The C++ class Reference has a target() method that returns a +// pointer to another Atom. We can't have pointers in object files, +// so instead NativeReferenceIvarsV1 contains an index to the target. +// The index is into this NCS_TargetsTable of uint32_t entries. +// The values in this table are the index of the (target) atom in this file. +// For DefinedAtoms the value is from 0 to NCS_DefinedAtomsV1.elementCount. +// For UndefinedAtoms the value is from NCS_DefinedAtomsV1.elementCount to +// NCS_DefinedAtomsV1.elementCount+NCS_UndefinedAtomsV1.elementCount. +// + + +// +// The NCS_AddendsTable chunk contains an array of int64_t entries. +// If we allocated space for addends directly in NativeReferenceIvarsV1 +// it would double the size of that struct. But since addends are rare, +// we instead just keep a pool of addends and have NativeReferenceIvarsV1 +// (if it needs an addend) just store the index (into the pool) of the +// addend it needs. +// + + + +} // namespace lld + +#endif // LLD_READER_WRITER_NATIVE_NATIVE_FILE_FORMAT_H diff --git a/lib/ReaderWriter/Native/ReaderNative.cpp b/lib/ReaderWriter/Native/ReaderNative.cpp new file mode 100644 index 000000000000..84cdb4b997e8 --- /dev/null +++ b/lib/ReaderWriter/Native/ReaderNative.cpp @@ -0,0 +1,1013 @@ +//===- lib/ReaderWriter/Native/ReaderNative.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "NativeFileFormat.h" +#include "lld/Core/Atom.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <vector> + +namespace lld { +namespace native { + +// forward reference +class File; + +// +// An object of this class is instantied for each NativeDefinedAtomIvarsV1 +// struct in the NCS_DefinedAtomsV1 chunk. +// +class NativeDefinedAtomV1 : public DefinedAtom { +public: + NativeDefinedAtomV1(const File& f, + const NativeDefinedAtomIvarsV1* ivarData) + : _file(&f), _ivarData(ivarData) { } + + const lld::File& file() const override; + + uint64_t ordinal() const override; + + StringRef name() const override; + + uint64_t size() const override { return _ivarData->contentSize; } + + uint64_t sectionSize() const override { return _ivarData->sectionSize; } + + DefinedAtom::Scope scope() const override { + return (DefinedAtom::Scope)(attributes().scope); + } + + DefinedAtom::Interposable interposable() const override { + return (DefinedAtom::Interposable)(attributes().interposable); + } + + DefinedAtom::Merge merge() const override { + return (DefinedAtom::Merge)(attributes().merge); + } + + DefinedAtom::ContentType contentType() const override { + const NativeAtomAttributesV1& attr = attributes(); + return (DefinedAtom::ContentType)(attr.contentType); + } + + DefinedAtom::Alignment alignment() const override { + return DefinedAtom::Alignment(attributes().align2, attributes().alignModulus); + } + + DefinedAtom::SectionChoice sectionChoice() const override { + return (DefinedAtom::SectionChoice)(attributes().sectionChoice); + } + + StringRef customSectionName() const override; + + DefinedAtom::DeadStripKind deadStrip() const override { + return (DefinedAtom::DeadStripKind)(attributes().deadStrip); + } + + DynamicExport dynamicExport() const override { + return (DynamicExport)attributes().dynamicExport; + } + + DefinedAtom::CodeModel codeModel() const override { + return DefinedAtom::CodeModel(attributes().codeModel); + } + + DefinedAtom::ContentPermissions permissions() const override { + return (DefinedAtom::ContentPermissions)(attributes().permissions); + } + + ArrayRef<uint8_t> rawContent() const override; + + reference_iterator begin() const override; + + reference_iterator end() const override; + + const Reference* derefIterator(const void*) const override; + + void incrementIterator(const void*& it) const override; + +private: + const NativeAtomAttributesV1& attributes() const; + + const File *_file; + const NativeDefinedAtomIvarsV1 *_ivarData; +}; + + + +// +// An object of this class is instantied for each NativeUndefinedAtomIvarsV1 +// struct in the NCS_UndefinedAtomsV1 chunk. +// +class NativeUndefinedAtomV1 : public UndefinedAtom { +public: + NativeUndefinedAtomV1(const File& f, + const NativeUndefinedAtomIvarsV1* ivarData) + : _file(&f), _ivarData(ivarData) { } + + const lld::File& file() const override; + StringRef name() const override; + + CanBeNull canBeNull() const override { + return (CanBeNull)(_ivarData->flags & 0x3); + } + + const UndefinedAtom *fallback() const override; + +private: + const File *_file; + const NativeUndefinedAtomIvarsV1 *_ivarData; + mutable std::unique_ptr<const SimpleUndefinedAtom> _fallback; +}; + + +// +// An object of this class is instantied for each NativeUndefinedAtomIvarsV1 +// struct in the NCS_SharedLibraryAtomsV1 chunk. +// +class NativeSharedLibraryAtomV1 : public SharedLibraryAtom { +public: + NativeSharedLibraryAtomV1(const File& f, + const NativeSharedLibraryAtomIvarsV1* ivarData) + : _file(&f), _ivarData(ivarData) { } + + const lld::File& file() const override; + StringRef name() const override; + StringRef loadName() const override; + + bool canBeNullAtRuntime() const override { + return (_ivarData->flags & 0x1); + } + + Type type() const override { + return (Type)_ivarData->type; + } + + uint64_t size() const override { + return _ivarData->size; + } + +private: + const File *_file; + const NativeSharedLibraryAtomIvarsV1 *_ivarData; +}; + + +// +// An object of this class is instantied for each NativeAbsoluteAtomIvarsV1 +// struct in the NCS_AbsoluteAtomsV1 chunk. +// +class NativeAbsoluteAtomV1 : public AbsoluteAtom { +public: + NativeAbsoluteAtomV1(const File& f, + const NativeAbsoluteAtomIvarsV1* ivarData) + : _file(&f), _ivarData(ivarData) { } + + const lld::File& file() const override; + StringRef name() const override; + Scope scope() const override { + const NativeAtomAttributesV1& attr = absAttributes(); + return (Scope)(attr.scope); + } + uint64_t value() const override { + return _ivarData->value; + } + +private: + const NativeAtomAttributesV1& absAttributes() const; + const File *_file; + const NativeAbsoluteAtomIvarsV1 *_ivarData; +}; + + +// +// An object of this class is instantied for each NativeReferenceIvarsV1 +// struct in the NCS_ReferencesArrayV1 chunk. +// +class NativeReferenceV1 : public Reference { +public: + NativeReferenceV1(const File &f, const NativeReferenceIvarsV1 *ivarData) + : Reference((KindNamespace)ivarData->kindNamespace, + (KindArch)ivarData->kindArch, ivarData->kindValue), + _file(&f), _ivarData(ivarData) {} + + uint64_t offsetInAtom() const override { + return _ivarData->offsetInAtom; + } + + const Atom* target() const override; + Addend addend() const override; + void setTarget(const Atom* newAtom) override; + void setAddend(Addend a) override; + +private: + const File *_file; + const NativeReferenceIvarsV1 *_ivarData; +}; + + +// +// An object of this class is instantied for each NativeReferenceIvarsV1 +// struct in the NCS_ReferencesArrayV1 chunk. +// +class NativeReferenceV2 : public Reference { +public: + NativeReferenceV2(const File &f, const NativeReferenceIvarsV2 *ivarData) + : Reference((KindNamespace)ivarData->kindNamespace, + (KindArch)ivarData->kindArch, ivarData->kindValue), + _file(&f), _ivarData(ivarData) {} + + uint64_t offsetInAtom() const override { + return _ivarData->offsetInAtom; + } + + const Atom* target() const override; + Addend addend() const override; + void setTarget(const Atom* newAtom) override; + void setAddend(Addend a) override; + uint32_t tag() const override; + +private: + const File *_file; + const NativeReferenceIvarsV2 *_ivarData; +}; + + +// +// lld::File object for native llvm object file +// +class File : public lld::File { +public: + File(std::unique_ptr<MemoryBuffer> mb) + : lld::File(mb->getBufferIdentifier(), kindObject), + _mb(std::move(mb)), // Reader now takes ownership of buffer + _header(nullptr), _targetsTable(nullptr), _targetsTableCount(0), + _strings(nullptr), _stringsMaxOffset(0), _addends(nullptr), + _addendsMaxIndex(0), _contentStart(nullptr), _contentEnd(nullptr) { + _header = + reinterpret_cast<const NativeFileHeader *>(_mb->getBufferStart()); + } + + /// Parses a File object from a native object file. + std::error_code doParse() override { + const uint8_t *const base = + reinterpret_cast<const uint8_t *>(_mb->getBufferStart()); + StringRef path(_mb->getBufferIdentifier()); + const NativeFileHeader *const header = + reinterpret_cast<const NativeFileHeader *>(base); + const NativeChunk *const chunks = + reinterpret_cast<const NativeChunk *>(base + sizeof(NativeFileHeader)); + // make sure magic matches + if (memcmp(header->magic, NATIVE_FILE_HEADER_MAGIC, + sizeof(header->magic)) != 0) + return make_error_code(NativeReaderError::unknown_file_format); + + // make sure mapped file contains all needed data + const size_t fileSize = _mb->getBufferSize(); + if (header->fileSize > fileSize) + return make_error_code(NativeReaderError::file_too_short); + + DEBUG_WITH_TYPE("ReaderNative", + llvm::dbgs() << " Native File Header:" << " fileSize=" + << header->fileSize << " chunkCount=" + << header->chunkCount << "\n"); + + // process each chunk + for (uint32_t i = 0; i < header->chunkCount; ++i) { + std::error_code ec; + const NativeChunk* chunk = &chunks[i]; + // sanity check chunk is within file + if ( chunk->fileOffset > fileSize ) + return make_error_code(NativeReaderError::file_malformed); + if ( (chunk->fileOffset + chunk->fileSize) > fileSize) + return make_error_code(NativeReaderError::file_malformed); + // process chunk, based on signature + switch ( chunk->signature ) { + case NCS_DefinedAtomsV1: + ec = processDefinedAtomsV1(base, chunk); + break; + case NCS_AttributesArrayV1: + ec = processAttributesV1(base, chunk); + break; + case NCS_UndefinedAtomsV1: + ec = processUndefinedAtomsV1(base, chunk); + break; + case NCS_SharedLibraryAtomsV1: + ec = processSharedLibraryAtomsV1(base, chunk); + break; + case NCS_AbsoluteAtomsV1: + ec = processAbsoluteAtomsV1(base, chunk); + break; + case NCS_AbsoluteAttributesV1: + ec = processAbsoluteAttributesV1(base, chunk); + break; + case NCS_ReferencesArrayV1: + ec = processReferencesV1(base, chunk); + break; + case NCS_ReferencesArrayV2: + ec = processReferencesV2(base, chunk); + break; + case NCS_TargetsTable: + ec = processTargetsTable(base, chunk); + break; + case NCS_AddendsTable: + ec = processAddendsTable(base, chunk); + break; + case NCS_Content: + ec = processContent(base, chunk); + break; + case NCS_Strings: + ec = processStrings(base, chunk); + break; + default: + return make_error_code(NativeReaderError::unknown_chunk_type); + } + if ( ec ) { + return ec; + } + } + // TO DO: validate enough chunks were used + + DEBUG_WITH_TYPE("ReaderNative", { + llvm::dbgs() << " ReaderNative DefinedAtoms:\n"; + for (const DefinedAtom *a : defined()) { + llvm::dbgs() << llvm::format(" 0x%09lX", a) + << ", name=" << a->name() + << ", size=" << a->size() << "\n"; + for (const Reference *r : *a) { + llvm::dbgs() << " offset=" + << llvm::format("0x%03X", r->offsetInAtom()) + << ", kind=" << r->kindValue() + << ", target=" << r->target() << "\n"; + } + } + }); + return make_error_code(NativeReaderError::success); + } + + virtual ~File() { + // _mb is automatically deleted because of std::unique_ptr<> + + // All other ivar pointers are pointers into the MemoryBuffer, except + // the _definedAtoms array which was allocated to contain an array + // of Atom objects. The atoms have empty destructors, so it is ok + // to just delete the memory. + delete _definedAtoms._arrayStart; + delete _undefinedAtoms._arrayStart; + delete _sharedLibraryAtoms._arrayStart; + delete _absoluteAtoms._arrayStart; + delete _referencesV1.arrayStart; + delete _referencesV2.arrayStart; + delete [] _targetsTable; + } + + const atom_collection<DefinedAtom>& defined() const override { + return _definedAtoms; + } + const atom_collection<UndefinedAtom>& undefined() const override { + return _undefinedAtoms; + } + const atom_collection<SharedLibraryAtom>& sharedLibrary() const override { + return _sharedLibraryAtoms; + } + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + +private: + friend NativeDefinedAtomV1; + friend NativeUndefinedAtomV1; + friend NativeSharedLibraryAtomV1; + friend NativeAbsoluteAtomV1; + friend NativeReferenceV1; + friend NativeReferenceV2; + + // instantiate array of DefinedAtoms from v1 ivar data in file + std::error_code processDefinedAtomsV1(const uint8_t *base, + const NativeChunk *chunk) { + const size_t atomSize = sizeof(NativeDefinedAtomV1); + size_t atomsArraySize = chunk->elementCount * atomSize; + uint8_t* atomsStart = reinterpret_cast<uint8_t*> + (operator new(atomsArraySize, std::nothrow)); + if (atomsStart == nullptr) + return make_error_code(NativeReaderError::memory_error); + const size_t ivarElementSize = chunk->fileSize + / chunk->elementCount; + if ( ivarElementSize != sizeof(NativeDefinedAtomIvarsV1) ) + return make_error_code(NativeReaderError::file_malformed); + uint8_t* atomsEnd = atomsStart + atomsArraySize; + const NativeDefinedAtomIvarsV1* ivarData = + reinterpret_cast<const NativeDefinedAtomIvarsV1*> + (base + chunk->fileOffset); + for(uint8_t* s = atomsStart; s != atomsEnd; s += atomSize) { + NativeDefinedAtomV1* atomAllocSpace = + reinterpret_cast<NativeDefinedAtomV1*>(s); + new (atomAllocSpace) NativeDefinedAtomV1(*this, ivarData); + ++ivarData; + } + this->_definedAtoms._arrayStart = atomsStart; + this->_definedAtoms._arrayEnd = atomsEnd; + this->_definedAtoms._elementSize = atomSize; + this->_definedAtoms._elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk DefinedAtomsV1: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + + + // set up pointers to attributes array + std::error_code processAttributesV1(const uint8_t *base, + const NativeChunk *chunk) { + this->_attributes = base + chunk->fileOffset; + this->_attributesMaxOffset = chunk->fileSize; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk AttributesV1: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + // set up pointers to attributes array + std::error_code processAbsoluteAttributesV1(const uint8_t *base, + const NativeChunk *chunk) { + this->_absAttributes = base + chunk->fileOffset; + this->_absAbsoluteMaxOffset = chunk->fileSize; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk AbsoluteAttributesV1: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + // instantiate array of UndefinedAtoms from v1 ivar data in file + std::error_code processUndefinedAtomsV1(const uint8_t *base, + const NativeChunk *chunk) { + const size_t atomSize = sizeof(NativeUndefinedAtomV1); + size_t atomsArraySize = chunk->elementCount * atomSize; + uint8_t* atomsStart = reinterpret_cast<uint8_t*> + (operator new(atomsArraySize, std::nothrow)); + if (atomsStart == nullptr) + return make_error_code(NativeReaderError::memory_error); + const size_t ivarElementSize = chunk->fileSize + / chunk->elementCount; + if ( ivarElementSize != sizeof(NativeUndefinedAtomIvarsV1) ) + return make_error_code(NativeReaderError::file_malformed); + uint8_t* atomsEnd = atomsStart + atomsArraySize; + const NativeUndefinedAtomIvarsV1* ivarData = + reinterpret_cast<const NativeUndefinedAtomIvarsV1*> + (base + chunk->fileOffset); + for(uint8_t* s = atomsStart; s != atomsEnd; s += atomSize) { + NativeUndefinedAtomV1* atomAllocSpace = + reinterpret_cast<NativeUndefinedAtomV1*>(s); + new (atomAllocSpace) NativeUndefinedAtomV1(*this, ivarData); + ++ivarData; + } + this->_undefinedAtoms._arrayStart = atomsStart; + this->_undefinedAtoms._arrayEnd = atomsEnd; + this->_undefinedAtoms._elementSize = atomSize; + this->_undefinedAtoms._elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk UndefinedAtomsV1:" + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + + // instantiate array of ShareLibraryAtoms from v1 ivar data in file + std::error_code processSharedLibraryAtomsV1(const uint8_t *base, + const NativeChunk *chunk) { + const size_t atomSize = sizeof(NativeSharedLibraryAtomV1); + size_t atomsArraySize = chunk->elementCount * atomSize; + uint8_t* atomsStart = reinterpret_cast<uint8_t*> + (operator new(atomsArraySize, std::nothrow)); + if (atomsStart == nullptr) + return make_error_code(NativeReaderError::memory_error); + const size_t ivarElementSize = chunk->fileSize + / chunk->elementCount; + if ( ivarElementSize != sizeof(NativeSharedLibraryAtomIvarsV1) ) + return make_error_code(NativeReaderError::file_malformed); + uint8_t* atomsEnd = atomsStart + atomsArraySize; + const NativeSharedLibraryAtomIvarsV1* ivarData = + reinterpret_cast<const NativeSharedLibraryAtomIvarsV1*> + (base + chunk->fileOffset); + for(uint8_t* s = atomsStart; s != atomsEnd; s += atomSize) { + NativeSharedLibraryAtomV1* atomAllocSpace = + reinterpret_cast<NativeSharedLibraryAtomV1*>(s); + new (atomAllocSpace) NativeSharedLibraryAtomV1(*this, ivarData); + ++ivarData; + } + this->_sharedLibraryAtoms._arrayStart = atomsStart; + this->_sharedLibraryAtoms._arrayEnd = atomsEnd; + this->_sharedLibraryAtoms._elementSize = atomSize; + this->_sharedLibraryAtoms._elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk SharedLibraryAtomsV1:" + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + + // instantiate array of AbsoluteAtoms from v1 ivar data in file + std::error_code processAbsoluteAtomsV1(const uint8_t *base, + const NativeChunk *chunk) { + const size_t atomSize = sizeof(NativeAbsoluteAtomV1); + size_t atomsArraySize = chunk->elementCount * atomSize; + uint8_t* atomsStart = reinterpret_cast<uint8_t*> + (operator new(atomsArraySize, std::nothrow)); + if (atomsStart == nullptr) + return make_error_code(NativeReaderError::memory_error); + const size_t ivarElementSize = chunk->fileSize + / chunk->elementCount; + if ( ivarElementSize != sizeof(NativeAbsoluteAtomIvarsV1) ) + return make_error_code(NativeReaderError::file_malformed); + uint8_t* atomsEnd = atomsStart + atomsArraySize; + const NativeAbsoluteAtomIvarsV1* ivarData = + reinterpret_cast<const NativeAbsoluteAtomIvarsV1*> + (base + chunk->fileOffset); + for(uint8_t* s = atomsStart; s != atomsEnd; s += atomSize) { + NativeAbsoluteAtomV1* atomAllocSpace = + reinterpret_cast<NativeAbsoluteAtomV1*>(s); + new (atomAllocSpace) NativeAbsoluteAtomV1(*this, ivarData); + ++ivarData; + } + this->_absoluteAtoms._arrayStart = atomsStart; + this->_absoluteAtoms._arrayEnd = atomsEnd; + this->_absoluteAtoms._elementSize = atomSize; + this->_absoluteAtoms._elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk AbsoluteAtomsV1: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + template <class T, class U> + std::error_code + processReferences(const uint8_t *base, const NativeChunk *chunk, + uint8_t *&refsStart, uint8_t *&refsEnd) const { + if (chunk->elementCount == 0) + return make_error_code(NativeReaderError::success); + size_t refsArraySize = chunk->elementCount * sizeof(T); + refsStart = reinterpret_cast<uint8_t *>( + operator new(refsArraySize, std::nothrow)); + if (refsStart == nullptr) + return make_error_code(NativeReaderError::memory_error); + const size_t ivarElementSize = chunk->fileSize / chunk->elementCount; + if (ivarElementSize != sizeof(U)) + return make_error_code(NativeReaderError::file_malformed); + refsEnd = refsStart + refsArraySize; + const U* ivarData = reinterpret_cast<const U *>(base + chunk->fileOffset); + for (uint8_t *s = refsStart; s != refsEnd; s += sizeof(T), ++ivarData) { + T *atomAllocSpace = reinterpret_cast<T *>(s); + new (atomAllocSpace) T(*this, ivarData); + } + return make_error_code(NativeReaderError::success); + } + + // instantiate array of References from v1 ivar data in file + std::error_code processReferencesV1(const uint8_t *base, + const NativeChunk *chunk) { + uint8_t *refsStart, *refsEnd; + if (std::error_code ec = + processReferences<NativeReferenceV1, NativeReferenceIvarsV1>( + base, chunk, refsStart, refsEnd)) + return ec; + this->_referencesV1.arrayStart = refsStart; + this->_referencesV1.arrayEnd = refsEnd; + this->_referencesV1.elementSize = sizeof(NativeReferenceV1); + this->_referencesV1.elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", { + llvm::dbgs() << " chunk ReferencesV1: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize << "\n"; + }); + return make_error_code(NativeReaderError::success); + } + + // instantiate array of References from v2 ivar data in file + std::error_code processReferencesV2(const uint8_t *base, + const NativeChunk *chunk) { + uint8_t *refsStart, *refsEnd; + if (std::error_code ec = + processReferences<NativeReferenceV2, NativeReferenceIvarsV2>( + base, chunk, refsStart, refsEnd)) + return ec; + this->_referencesV2.arrayStart = refsStart; + this->_referencesV2.arrayEnd = refsEnd; + this->_referencesV2.elementSize = sizeof(NativeReferenceV2); + this->_referencesV2.elementCount = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", { + llvm::dbgs() << " chunk ReferencesV2: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize << "\n"; + }); + return make_error_code(NativeReaderError::success); + } + + // set up pointers to target table + std::error_code processTargetsTable(const uint8_t *base, + const NativeChunk *chunk) { + const uint32_t* targetIndexes = reinterpret_cast<const uint32_t*> + (base + chunk->fileOffset); + this->_targetsTableCount = chunk->elementCount; + this->_targetsTable = new const Atom*[chunk->elementCount]; + for (uint32_t i=0; i < chunk->elementCount; ++i) { + const uint32_t index = targetIndexes[i]; + if ( index < _definedAtoms._elementCount ) { + const uint8_t* p = _definedAtoms._arrayStart + + index * _definedAtoms._elementSize; + this->_targetsTable[i] = reinterpret_cast<const DefinedAtom*>(p); + continue; + } + const uint32_t undefIndex = index - _definedAtoms._elementCount; + if ( undefIndex < _undefinedAtoms._elementCount ) { + const uint8_t* p = _undefinedAtoms._arrayStart + + undefIndex * _undefinedAtoms._elementSize; + this->_targetsTable[i] = reinterpret_cast<const UndefinedAtom*>(p); + continue; + } + const uint32_t slIndex = index - _definedAtoms._elementCount + - _undefinedAtoms._elementCount; + if ( slIndex < _sharedLibraryAtoms._elementCount ) { + const uint8_t* p = _sharedLibraryAtoms._arrayStart + + slIndex * _sharedLibraryAtoms._elementSize; + this->_targetsTable[i] = reinterpret_cast<const SharedLibraryAtom*>(p); + continue; + } + const uint32_t abIndex = index - _definedAtoms._elementCount + - _undefinedAtoms._elementCount + - _sharedLibraryAtoms._elementCount; + if ( abIndex < _absoluteAtoms._elementCount ) { + const uint8_t* p = _absoluteAtoms._arrayStart + + abIndex * _absoluteAtoms._elementSize; + this->_targetsTable[i] = reinterpret_cast<const AbsoluteAtom*>(p); + continue; + } + return make_error_code(NativeReaderError::file_malformed); + } + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk Targets Table: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + + // set up pointers to addend pool in file + std::error_code processAddendsTable(const uint8_t *base, + const NativeChunk *chunk) { + this->_addends = reinterpret_cast<const Reference::Addend*> + (base + chunk->fileOffset); + this->_addendsMaxIndex = chunk->elementCount; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk Addends: " + << " count=" << chunk->elementCount + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + // set up pointers to string pool in file + std::error_code processStrings(const uint8_t *base, + const NativeChunk *chunk) { + this->_strings = reinterpret_cast<const char*>(base + chunk->fileOffset); + this->_stringsMaxOffset = chunk->fileSize; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk Strings: " + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + // set up pointers to content area in file + std::error_code processContent(const uint8_t *base, + const NativeChunk *chunk) { + this->_contentStart = base + chunk->fileOffset; + this->_contentEnd = base + chunk->fileOffset + chunk->fileSize; + DEBUG_WITH_TYPE("ReaderNative", llvm::dbgs() + << " chunk content: " + << " chunkSize=" << chunk->fileSize + << "\n"); + return make_error_code(NativeReaderError::success); + } + + StringRef string(uint32_t offset) const { + assert(offset < _stringsMaxOffset); + return StringRef(&_strings[offset]); + } + + Reference::Addend addend(uint32_t index) const { + if ( index == 0 ) + return 0; // addend index zero is used to mean "no addend" + assert(index <= _addendsMaxIndex); + return _addends[index-1]; // one-based indexing + } + + const NativeAtomAttributesV1& attribute(uint32_t off) const { + assert(off < _attributesMaxOffset); + return *reinterpret_cast<const NativeAtomAttributesV1*>(_attributes + off); + } + + const NativeAtomAttributesV1& absAttribute(uint32_t off) const { + assert(off < _absAbsoluteMaxOffset); + return *reinterpret_cast<const NativeAtomAttributesV1*>(_absAttributes + off); + } + + const uint8_t* content(uint32_t offset, uint32_t size) const { + const uint8_t* result = _contentStart + offset; + assert((result+size) <= _contentEnd); + return result; + } + + const Reference* referenceByIndex(uintptr_t index) const { + if (index < _referencesV1.elementCount) { + return reinterpret_cast<const NativeReferenceV1*>( + _referencesV1.arrayStart + index * _referencesV1.elementSize); + } + assert(index < _referencesV2.elementCount); + return reinterpret_cast<const NativeReferenceV2*>( + _referencesV2.arrayStart + index * _referencesV2.elementSize); + } + + const Atom* targetV1(uint16_t index) const { + if ( index == NativeReferenceIvarsV1::noTarget ) + return nullptr; + assert(index < _targetsTableCount); + return _targetsTable[index]; + } + + void setTargetV1(uint16_t index, const Atom* newAtom) const { + assert(index != NativeReferenceIvarsV1::noTarget); + assert(index > _targetsTableCount); + _targetsTable[index] = newAtom; + } + + const Atom* targetV2(uint32_t index) const { + if (index == NativeReferenceIvarsV2::noTarget) + return nullptr; + assert(index < _targetsTableCount); + return _targetsTable[index]; + } + + void setTargetV2(uint32_t index, const Atom* newAtom) const { + assert(index != NativeReferenceIvarsV2::noTarget); + assert(index > _targetsTableCount); + _targetsTable[index] = newAtom; + } + + template <typename T> + class AtomArray : public File::atom_collection<T> { + public: + AtomArray() : _arrayStart(nullptr), _arrayEnd(nullptr), + _elementSize(0), _elementCount(0) { } + + virtual atom_iterator<T> begin() const { + return atom_iterator<T>(*this, reinterpret_cast<const void*>(_arrayStart)); + } + virtual atom_iterator<T> end() const{ + return atom_iterator<T>(*this, reinterpret_cast<const void*>(_arrayEnd)); + } + virtual const T* deref(const void* it) const { + return reinterpret_cast<const T*>(it); + } + virtual void next(const void*& it) const { + const uint8_t* p = reinterpret_cast<const uint8_t*>(it); + p += _elementSize; + it = reinterpret_cast<const void*>(p); + } + virtual uint64_t size() const { return _elementCount; } + const uint8_t *_arrayStart; + const uint8_t *_arrayEnd; + uint32_t _elementSize; + uint32_t _elementCount; + }; + + struct IvarArray { + IvarArray() : + arrayStart(nullptr), + arrayEnd(nullptr), + elementSize(0), + elementCount(0) { } + + const uint8_t* arrayStart; + const uint8_t* arrayEnd; + uint32_t elementSize; + uint32_t elementCount; + }; + + std::unique_ptr<MemoryBuffer> _mb; + const NativeFileHeader* _header; + AtomArray<DefinedAtom> _definedAtoms; + AtomArray<UndefinedAtom> _undefinedAtoms; + AtomArray<SharedLibraryAtom> _sharedLibraryAtoms; + AtomArray<AbsoluteAtom> _absoluteAtoms; + const uint8_t* _absAttributes; + uint32_t _absAbsoluteMaxOffset; + const uint8_t* _attributes; + uint32_t _attributesMaxOffset; + IvarArray _referencesV1; + IvarArray _referencesV2; + const Atom** _targetsTable; + uint32_t _targetsTableCount; + const char* _strings; + uint32_t _stringsMaxOffset; + const Reference::Addend* _addends; + uint32_t _addendsMaxIndex; + const uint8_t *_contentStart; + const uint8_t *_contentEnd; +}; + +inline const lld::File &NativeDefinedAtomV1::file() const { + return *_file; +} + +inline uint64_t NativeDefinedAtomV1:: ordinal() const { + const uint8_t* p = reinterpret_cast<const uint8_t*>(_ivarData); + return p - _file->_definedAtoms._arrayStart; +} + +inline StringRef NativeDefinedAtomV1::name() const { + return _file->string(_ivarData->nameOffset); +} + +inline const NativeAtomAttributesV1& NativeDefinedAtomV1::attributes() const { + return _file->attribute(_ivarData->attributesOffset); +} + +inline ArrayRef<uint8_t> NativeDefinedAtomV1::rawContent() const { + if (!occupiesDiskSpace()) + return ArrayRef<uint8_t>(); + const uint8_t* p = _file->content(_ivarData->contentOffset, + _ivarData->contentSize); + return ArrayRef<uint8_t>(p, _ivarData->contentSize); +} + +inline StringRef NativeDefinedAtomV1::customSectionName() const { + uint32_t offset = attributes().sectionNameOffset; + return _file->string(offset); +} + +DefinedAtom::reference_iterator NativeDefinedAtomV1::begin() const { + uintptr_t index = _ivarData->referencesStartIndex; + const void* it = reinterpret_cast<const void*>(index); + return reference_iterator(*this, it); +} + +DefinedAtom::reference_iterator NativeDefinedAtomV1::end() const { + uintptr_t index = _ivarData->referencesStartIndex+_ivarData->referencesCount; + const void* it = reinterpret_cast<const void*>(index); + return reference_iterator(*this, it); +} + +const Reference* NativeDefinedAtomV1::derefIterator(const void* it) const { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + return _file->referenceByIndex(index); +} + +void NativeDefinedAtomV1::incrementIterator(const void*& it) const { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + ++index; + it = reinterpret_cast<const void*>(index); +} + +inline const lld::File& NativeUndefinedAtomV1::file() const { + return *_file; +} + +inline StringRef NativeUndefinedAtomV1::name() const { + return _file->string(_ivarData->nameOffset); +} + +inline const UndefinedAtom *NativeUndefinedAtomV1::fallback() const { + if (!_ivarData->fallbackNameOffset) + return nullptr; + if (!_fallback) + _fallback.reset(new SimpleUndefinedAtom( + *_file, _file->string(_ivarData->fallbackNameOffset))); + return _fallback.get(); +} + +inline const lld::File& NativeSharedLibraryAtomV1::file() const { + return *_file; +} + +inline StringRef NativeSharedLibraryAtomV1::name() const { + return _file->string(_ivarData->nameOffset); +} + +inline StringRef NativeSharedLibraryAtomV1::loadName() const { + return _file->string(_ivarData->loadNameOffset); +} + + + +inline const lld::File& NativeAbsoluteAtomV1::file() const { + return *_file; +} + +inline StringRef NativeAbsoluteAtomV1::name() const { + return _file->string(_ivarData->nameOffset); +} + +inline const NativeAtomAttributesV1& NativeAbsoluteAtomV1::absAttributes() const { + return _file->absAttribute(_ivarData->attributesOffset); +} + +inline const Atom* NativeReferenceV1::target() const { + return _file->targetV1(_ivarData->targetIndex); +} + +inline Reference::Addend NativeReferenceV1::addend() const { + return _file->addend(_ivarData->addendIndex); +} + +inline void NativeReferenceV1::setTarget(const Atom* newAtom) { + return _file->setTargetV1(_ivarData->targetIndex, newAtom); +} + +inline void NativeReferenceV1::setAddend(Addend a) { + // Do nothing if addend value is not being changed. + if (addend() == a) + return; + llvm_unreachable("setAddend() not supported"); +} + +inline const Atom* NativeReferenceV2::target() const { + return _file->targetV2(_ivarData->targetIndex); +} + +inline Reference::Addend NativeReferenceV2::addend() const { + return _ivarData->addend; +} + +inline void NativeReferenceV2::setTarget(const Atom* newAtom) { + return _file->setTargetV2(_ivarData->targetIndex, newAtom); +} + +inline void NativeReferenceV2::setAddend(Addend a) { + // Do nothing if addend value is not being changed. + if (addend() == a) + return; + llvm_unreachable("setAddend() not supported"); +} + +uint32_t NativeReferenceV2::tag() const { return _ivarData->tag; } + +} // end namespace native + +namespace { + +class NativeReader : public Reader { +public: + virtual bool canParse(file_magic magic, StringRef, + const MemoryBuffer &mb) const override { + const NativeFileHeader *const header = + reinterpret_cast<const NativeFileHeader *>(mb.getBufferStart()); + return (memcmp(header->magic, NATIVE_FILE_HEADER_MAGIC, + sizeof(header->magic)) == 0); + } + + virtual std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + auto *file = new lld::native::File(std::move(mb)); + result.push_back(std::unique_ptr<File>(file)); + return std::error_code(); + } +}; + +} + +void Registry::addSupportNativeObjects() { + add(std::unique_ptr<Reader>(new NativeReader())); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/Native/WriterNative.cpp b/lib/ReaderWriter/Native/WriterNative.cpp new file mode 100644 index 000000000000..5e01a6ce1c7c --- /dev/null +++ b/lib/ReaderWriter/Native/WriterNative.cpp @@ -0,0 +1,566 @@ +//===- lib/ReaderWriter/Native/WriterNative.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "NativeFileFormat.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/raw_ostream.h" +#include <cstdint> +#include <set> +#include <system_error> +#include <vector> + +namespace lld { +namespace native { + +/// +/// Class for writing native object files. +/// +class Writer : public lld::Writer { +public: + std::error_code writeFile(const lld::File &file, StringRef outPath) override { + // reserve first byte for unnamed atoms + _stringPool.push_back('\0'); + // visit all atoms + for ( const DefinedAtom *defAtom : file.defined() ) { + this->addIVarsForDefinedAtom(*defAtom); + // We are trying to process all atoms, but the defined() iterator does not + // return group children. So, when a group parent is found, we need to + // handle each child atom. + if (defAtom->isGroupParent()) { + for (const Reference *r : *defAtom) { + if (r->kindNamespace() != lld::Reference::KindNamespace::all) + continue; + if (r->kindValue() == lld::Reference::kindGroupChild) { + const DefinedAtom *target = dyn_cast<DefinedAtom>(r->target()); + assert(target && "Internal Error: kindGroupChild references need " + "to be associated with Defined Atoms only"); + this->addIVarsForDefinedAtom(*target); + } + } + } + } + for ( const UndefinedAtom *undefAtom : file.undefined() ) { + this->addIVarsForUndefinedAtom(*undefAtom); + } + for ( const SharedLibraryAtom *shlibAtom : file.sharedLibrary() ) { + this->addIVarsForSharedLibraryAtom(*shlibAtom); + } + for ( const AbsoluteAtom *absAtom : file.absolute() ) { + this->addIVarsForAbsoluteAtom(*absAtom); + } + + maybeConvertReferencesToV1(); + + // construct file header based on atom information accumulated + this->makeHeader(); + + std::error_code ec; + llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::F_None); + if (ec) + return ec; + + this->write(out); + + return std::error_code(); + } + + virtual ~Writer() { + } + +private: + + // write the lld::File in native format to the specified stream + void write(raw_ostream &out) { + assert(out.tell() == 0); + out.write((char*)_headerBuffer, _headerBufferSize); + + writeChunk(out, _definedAtomIvars, NCS_DefinedAtomsV1); + writeChunk(out, _attributes, NCS_AttributesArrayV1); + writeChunk(out, _undefinedAtomIvars, NCS_UndefinedAtomsV1); + writeChunk(out, _sharedLibraryAtomIvars, NCS_SharedLibraryAtomsV1); + writeChunk(out, _absoluteAtomIvars, NCS_AbsoluteAtomsV1); + writeChunk(out, _absAttributes, NCS_AbsoluteAttributesV1); + writeChunk(out, _stringPool, NCS_Strings); + writeChunk(out, _referencesV1, NCS_ReferencesArrayV1); + writeChunk(out, _referencesV2, NCS_ReferencesArrayV2); + + if (!_targetsTableIndex.empty()) { + assert(out.tell() == findChunk(NCS_TargetsTable).fileOffset); + writeTargetTable(out); + } + + if (!_addendsTableIndex.empty()) { + assert(out.tell() == findChunk(NCS_AddendsTable).fileOffset); + writeAddendTable(out); + } + + writeChunk(out, _contentPool, NCS_Content); + } + + template<class T> + void writeChunk(raw_ostream &out, std::vector<T> &vector, uint32_t signature) { + if (vector.empty()) + return; + assert(out.tell() == findChunk(signature).fileOffset); + out.write((char*)&vector[0], vector.size() * sizeof(T)); + } + + void addIVarsForDefinedAtom(const DefinedAtom& atom) { + _definedAtomIndex[&atom] = _definedAtomIvars.size(); + NativeDefinedAtomIvarsV1 ivar; + unsigned refsCount; + ivar.nameOffset = getNameOffset(atom); + ivar.attributesOffset = getAttributeOffset(atom); + ivar.referencesStartIndex = getReferencesIndex(atom, refsCount); + ivar.referencesCount = refsCount; + ivar.contentOffset = getContentOffset(atom); + ivar.contentSize = atom.size(); + ivar.sectionSize = atom.sectionSize(); + _definedAtomIvars.push_back(ivar); + } + + void addIVarsForUndefinedAtom(const UndefinedAtom& atom) { + _undefinedAtomIndex[&atom] = _undefinedAtomIvars.size(); + NativeUndefinedAtomIvarsV1 ivar; + ivar.nameOffset = getNameOffset(atom); + ivar.flags = (atom.canBeNull() & 0x03); + ivar.fallbackNameOffset = 0; + if (atom.fallback()) + ivar.fallbackNameOffset = getNameOffset(*atom.fallback()); + _undefinedAtomIvars.push_back(ivar); + } + + void addIVarsForSharedLibraryAtom(const SharedLibraryAtom& atom) { + _sharedLibraryAtomIndex[&atom] = _sharedLibraryAtomIvars.size(); + NativeSharedLibraryAtomIvarsV1 ivar; + ivar.size = atom.size(); + ivar.nameOffset = getNameOffset(atom); + ivar.loadNameOffset = getSharedLibraryNameOffset(atom.loadName()); + ivar.type = (uint32_t)atom.type(); + ivar.flags = atom.canBeNullAtRuntime(); + _sharedLibraryAtomIvars.push_back(ivar); + } + + void addIVarsForAbsoluteAtom(const AbsoluteAtom& atom) { + _absoluteAtomIndex[&atom] = _absoluteAtomIvars.size(); + NativeAbsoluteAtomIvarsV1 ivar; + ivar.nameOffset = getNameOffset(atom); + ivar.attributesOffset = getAttributeOffset(atom); + ivar.reserved = 0; + ivar.value = atom.value(); + _absoluteAtomIvars.push_back(ivar); + } + + void convertReferencesToV1() { + for (const NativeReferenceIvarsV2 &v2 : _referencesV2) { + NativeReferenceIvarsV1 v1; + v1.offsetInAtom = v2.offsetInAtom; + v1.kindNamespace = v2.kindNamespace; + v1.kindArch = v2.kindArch; + v1.kindValue = v2.kindValue; + v1.targetIndex = (v2.targetIndex == NativeReferenceIvarsV2::noTarget) ? + (uint16_t)NativeReferenceIvarsV1::noTarget : v2.targetIndex; + v1.addendIndex = this->getAddendIndex(v2.addend); + _referencesV1.push_back(v1); + } + _referencesV2.clear(); + } + + bool canConvertReferenceToV1(const NativeReferenceIvarsV2 &ref) { + bool validOffset = (ref.offsetInAtom == NativeReferenceIvarsV2::noTarget) || + ref.offsetInAtom < NativeReferenceIvarsV1::noTarget; + return validOffset && ref.targetIndex < UINT16_MAX; + } + + // Convert vector of NativeReferenceIvarsV2 to NativeReferenceIvarsV1 if + // possible. + void maybeConvertReferencesToV1() { + std::set<int64_t> addends; + for (const NativeReferenceIvarsV2 &ref : _referencesV2) { + if (!canConvertReferenceToV1(ref)) + return; + addends.insert(ref.addend); + if (addends.size() >= UINT16_MAX) + return; + } + convertReferencesToV1(); + } + + // fill out native file header and chunk directory + void makeHeader() { + const bool hasDefines = !_definedAtomIvars.empty(); + const bool hasUndefines = !_undefinedAtomIvars.empty(); + const bool hasSharedLibraries = !_sharedLibraryAtomIvars.empty(); + const bool hasAbsolutes = !_absoluteAtomIvars.empty(); + const bool hasReferencesV1 = !_referencesV1.empty(); + const bool hasReferencesV2 = !_referencesV2.empty(); + const bool hasTargetsTable = !_targetsTableIndex.empty(); + const bool hasAddendTable = !_addendsTableIndex.empty(); + const bool hasContent = !_contentPool.empty(); + + int chunkCount = 1; // always have string pool chunk + if ( hasDefines ) chunkCount += 2; + if ( hasUndefines ) ++chunkCount; + if ( hasSharedLibraries ) ++chunkCount; + if ( hasAbsolutes ) chunkCount += 2; + if ( hasReferencesV1 ) ++chunkCount; + if ( hasReferencesV2 ) ++chunkCount; + if ( hasTargetsTable ) ++chunkCount; + if ( hasAddendTable ) ++chunkCount; + if ( hasContent ) ++chunkCount; + + _headerBufferSize = sizeof(NativeFileHeader) + + chunkCount*sizeof(NativeChunk); + _headerBuffer = reinterpret_cast<NativeFileHeader*> + (operator new(_headerBufferSize, std::nothrow)); + NativeChunk *chunks = + reinterpret_cast<NativeChunk*>(reinterpret_cast<char*>(_headerBuffer) + + sizeof(NativeFileHeader)); + memcpy(_headerBuffer->magic, NATIVE_FILE_HEADER_MAGIC, + sizeof(_headerBuffer->magic)); + _headerBuffer->endian = NFH_LittleEndian; + _headerBuffer->architecture = 0; + _headerBuffer->fileSize = 0; + _headerBuffer->chunkCount = chunkCount; + + // create chunk for defined atom ivar array + int nextIndex = 0; + uint32_t nextFileOffset = _headerBufferSize; + if (hasDefines) { + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _definedAtomIvars, + NCS_DefinedAtomsV1); + + // create chunk for attributes + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _attributes, + NCS_AttributesArrayV1); + } + + // create chunk for undefined atom array + if (hasUndefines) + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _undefinedAtomIvars, + NCS_UndefinedAtomsV1); + + // create chunk for shared library atom array + if (hasSharedLibraries) + fillChunkHeader(chunks[nextIndex++], nextFileOffset, + _sharedLibraryAtomIvars, NCS_SharedLibraryAtomsV1); + + // create chunk for shared library atom array + if (hasAbsolutes) { + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _absoluteAtomIvars, + NCS_AbsoluteAtomsV1); + + // create chunk for attributes + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _absAttributes, + NCS_AbsoluteAttributesV1); + } + + // create chunk for symbol strings + // pad end of string pool to 4-bytes + while ((_stringPool.size() % 4) != 0) + _stringPool.push_back('\0'); + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _stringPool, + NCS_Strings); + + // create chunk for referencesV2 + if (hasReferencesV1) + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _referencesV1, + NCS_ReferencesArrayV1); + + // create chunk for referencesV2 + if (hasReferencesV2) + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _referencesV2, + NCS_ReferencesArrayV2); + + // create chunk for target table + if (hasTargetsTable) { + NativeChunk& cht = chunks[nextIndex++]; + cht.signature = NCS_TargetsTable; + cht.fileOffset = nextFileOffset; + cht.fileSize = _targetsTableIndex.size() * sizeof(uint32_t); + cht.elementCount = _targetsTableIndex.size(); + nextFileOffset = cht.fileOffset + cht.fileSize; + } + + // create chunk for addend table + if (hasAddendTable) { + NativeChunk& chad = chunks[nextIndex++]; + chad.signature = NCS_AddendsTable; + chad.fileOffset = nextFileOffset; + chad.fileSize = _addendsTableIndex.size() * sizeof(Reference::Addend); + chad.elementCount = _addendsTableIndex.size(); + nextFileOffset = chad.fileOffset + chad.fileSize; + } + + // create chunk for content + if (hasContent) + fillChunkHeader(chunks[nextIndex++], nextFileOffset, _contentPool, + NCS_Content); + + _headerBuffer->fileSize = nextFileOffset; + } + + template<class T> + void fillChunkHeader(NativeChunk &chunk, uint32_t &nextFileOffset, + const std::vector<T> &data, uint32_t signature) { + chunk.signature = signature; + chunk.fileOffset = nextFileOffset; + chunk.fileSize = data.size() * sizeof(T); + chunk.elementCount = data.size(); + nextFileOffset = chunk.fileOffset + chunk.fileSize; + } + + // scan header to find particular chunk + NativeChunk& findChunk(uint32_t signature) { + const uint32_t chunkCount = _headerBuffer->chunkCount; + NativeChunk* chunks = + reinterpret_cast<NativeChunk*>(reinterpret_cast<char*>(_headerBuffer) + + sizeof(NativeFileHeader)); + for (uint32_t i=0; i < chunkCount; ++i) { + if ( chunks[i].signature == signature ) + return chunks[i]; + } + llvm_unreachable("findChunk() signature not found"); + } + + // append atom name to string pool and return offset + uint32_t getNameOffset(const Atom& atom) { + return this->getNameOffset(atom.name()); + } + + // check if name is already in pool or append and return offset + uint32_t getSharedLibraryNameOffset(StringRef name) { + assert(!name.empty()); + // look to see if this library name was used by another atom + for (auto &it : _sharedLibraryNames) + if (name.equals(it.first)) + return it.second; + // first use of this library name + uint32_t result = this->getNameOffset(name); + _sharedLibraryNames.push_back(std::make_pair(name, result)); + return result; + } + + // append atom name to string pool and return offset + uint32_t getNameOffset(StringRef name) { + if ( name.empty() ) + return 0; + uint32_t result = _stringPool.size(); + _stringPool.insert(_stringPool.end(), name.begin(), name.end()); + _stringPool.push_back(0); + return result; + } + + // append atom cotent to content pool and return offset + uint32_t getContentOffset(const DefinedAtom& atom) { + if (!atom.occupiesDiskSpace()) + return 0; + uint32_t result = _contentPool.size(); + ArrayRef<uint8_t> cont = atom.rawContent(); + _contentPool.insert(_contentPool.end(), cont.begin(), cont.end()); + return result; + } + + // reuse existing attributes entry or create a new one and return offet + uint32_t getAttributeOffset(const DefinedAtom& atom) { + NativeAtomAttributesV1 attrs = computeAttributesV1(atom); + return getOrPushAttribute(_attributes, attrs); + } + + uint32_t getAttributeOffset(const AbsoluteAtom& atom) { + NativeAtomAttributesV1 attrs = computeAbsoluteAttributes(atom); + return getOrPushAttribute(_absAttributes, attrs); + } + + uint32_t getOrPushAttribute(std::vector<NativeAtomAttributesV1> &dest, + const NativeAtomAttributesV1 &attrs) { + for (size_t i = 0, e = dest.size(); i < e; ++i) { + if (!memcmp(&dest[i], &attrs, sizeof(attrs))) { + // found that this set of attributes already used, so re-use + return i * sizeof(attrs); + } + } + // append new attribute set to end + uint32_t result = dest.size() * sizeof(attrs); + dest.push_back(attrs); + return result; + } + + uint32_t sectionNameOffset(const DefinedAtom& atom) { + // if section based on content, then no custom section name available + if (atom.sectionChoice() == DefinedAtom::sectionBasedOnContent) + return 0; + StringRef name = atom.customSectionName(); + assert(!name.empty()); + // look to see if this section name was used by another atom + for (auto &it : _sectionNames) + if (name.equals(it.first)) + return it.second; + // first use of this section name + uint32_t result = this->getNameOffset(name); + _sectionNames.push_back(std::make_pair(name, result)); + return result; + } + + NativeAtomAttributesV1 computeAttributesV1(const DefinedAtom& atom) { + NativeAtomAttributesV1 attrs; + attrs.sectionNameOffset = sectionNameOffset(atom); + attrs.align2 = atom.alignment().powerOf2; + attrs.alignModulus = atom.alignment().modulus; + attrs.scope = atom.scope(); + attrs.interposable = atom.interposable(); + attrs.merge = atom.merge(); + attrs.contentType = atom.contentType(); + attrs.sectionChoice = atom.sectionChoice(); + attrs.deadStrip = atom.deadStrip(); + attrs.dynamicExport = atom.dynamicExport(); + attrs.codeModel = atom.codeModel(); + attrs.permissions = atom.permissions(); + return attrs; + } + + NativeAtomAttributesV1 computeAbsoluteAttributes(const AbsoluteAtom& atom) { + NativeAtomAttributesV1 attrs; + attrs.scope = atom.scope(); + return attrs; + } + + // add references for this atom in a contiguous block in NCS_ReferencesArrayV2 + uint32_t getReferencesIndex(const DefinedAtom& atom, unsigned& refsCount) { + size_t startRefSize = _referencesV2.size(); + uint32_t result = startRefSize; + for (const Reference *ref : atom) { + NativeReferenceIvarsV2 nref; + nref.offsetInAtom = ref->offsetInAtom(); + nref.kindNamespace = (uint8_t)ref->kindNamespace(); + nref.kindArch = (uint8_t)ref->kindArch(); + nref.kindValue = ref->kindValue(); + nref.targetIndex = this->getTargetIndex(ref->target()); + nref.addend = ref->addend(); + nref.tag = ref->tag(); + _referencesV2.push_back(nref); + } + refsCount = _referencesV2.size() - startRefSize; + return (refsCount == 0) ? 0 : result; + } + + uint32_t getTargetIndex(const Atom* target) { + if ( target == nullptr ) + return NativeReferenceIvarsV2::noTarget; + TargetToIndex::const_iterator pos = _targetsTableIndex.find(target); + if ( pos != _targetsTableIndex.end() ) { + return pos->second; + } + uint32_t result = _targetsTableIndex.size(); + _targetsTableIndex[target] = result; + return result; + } + + void writeTargetTable(raw_ostream &out) { + // Build table of target indexes + uint32_t maxTargetIndex = _targetsTableIndex.size(); + assert(maxTargetIndex > 0); + std::vector<uint32_t> targetIndexes(maxTargetIndex); + for (auto &it : _targetsTableIndex) { + const Atom* atom = it.first; + uint32_t targetIndex = it.second; + assert(targetIndex < maxTargetIndex); + + TargetToIndex::iterator pos = _definedAtomIndex.find(atom); + if (pos != _definedAtomIndex.end()) { + targetIndexes[targetIndex] = pos->second; + continue; + } + uint32_t base = _definedAtomIvars.size(); + + pos = _undefinedAtomIndex.find(atom); + if (pos != _undefinedAtomIndex.end()) { + targetIndexes[targetIndex] = pos->second + base; + continue; + } + base += _undefinedAtomIndex.size(); + + pos = _sharedLibraryAtomIndex.find(atom); + if (pos != _sharedLibraryAtomIndex.end()) { + targetIndexes[targetIndex] = pos->second + base; + continue; + } + base += _sharedLibraryAtomIndex.size(); + + pos = _absoluteAtomIndex.find(atom); + assert(pos != _absoluteAtomIndex.end()); + targetIndexes[targetIndex] = pos->second + base; + } + // write table + out.write((char*)&targetIndexes[0], maxTargetIndex * sizeof(uint32_t)); + } + + uint32_t getAddendIndex(Reference::Addend addend) { + if ( addend == 0 ) + return 0; // addend index zero is used to mean "no addend" + AddendToIndex::const_iterator pos = _addendsTableIndex.find(addend); + if ( pos != _addendsTableIndex.end() ) { + return pos->second; + } + uint32_t result = _addendsTableIndex.size() + 1; // one-based index + _addendsTableIndex[addend] = result; + return result; + } + + void writeAddendTable(raw_ostream &out) { + // Build table of addends + uint32_t maxAddendIndex = _addendsTableIndex.size(); + std::vector<Reference::Addend> addends(maxAddendIndex); + for (auto &it : _addendsTableIndex) { + Reference::Addend addend = it.first; + uint32_t index = it.second; + assert(index <= maxAddendIndex); + addends[index-1] = addend; + } + // write table + out.write((char*)&addends[0], maxAddendIndex*sizeof(Reference::Addend)); + } + + typedef std::vector<std::pair<StringRef, uint32_t>> NameToOffsetVector; + + typedef llvm::DenseMap<const Atom*, uint32_t> TargetToIndex; + typedef llvm::DenseMap<Reference::Addend, uint32_t> AddendToIndex; + + NativeFileHeader* _headerBuffer; + size_t _headerBufferSize; + std::vector<char> _stringPool; + std::vector<uint8_t> _contentPool; + std::vector<NativeDefinedAtomIvarsV1> _definedAtomIvars; + std::vector<NativeAtomAttributesV1> _attributes; + std::vector<NativeAtomAttributesV1> _absAttributes; + std::vector<NativeUndefinedAtomIvarsV1> _undefinedAtomIvars; + std::vector<NativeSharedLibraryAtomIvarsV1> _sharedLibraryAtomIvars; + std::vector<NativeAbsoluteAtomIvarsV1> _absoluteAtomIvars; + std::vector<NativeReferenceIvarsV1> _referencesV1; + std::vector<NativeReferenceIvarsV2> _referencesV2; + TargetToIndex _targetsTableIndex; + TargetToIndex _definedAtomIndex; + TargetToIndex _undefinedAtomIndex; + TargetToIndex _sharedLibraryAtomIndex; + TargetToIndex _absoluteAtomIndex; + AddendToIndex _addendsTableIndex; + NameToOffsetVector _sectionNames; + NameToOffsetVector _sharedLibraryNames; +}; +} // end namespace native + +std::unique_ptr<Writer> createWriterNative() { + return std::unique_ptr<Writer>(new native::Writer()); +} +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/Atoms.h b/lib/ReaderWriter/PECOFF/Atoms.h new file mode 100644 index 000000000000..257edc17884b --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Atoms.h @@ -0,0 +1,312 @@ +//===- lib/ReaderWriter/PECOFF/Atoms.h ------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_ATOMS_H +#define LLD_READER_WRITER_PE_COFF_ATOMS_H + +#include "lld/Core/File.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/COFF.h" +#include <vector> + +namespace lld { +namespace pecoff { +class COFFDefinedAtom; + +class COFFUndefinedAtom : public UndefinedAtom { +public: + COFFUndefinedAtom(const File &file, StringRef name, + const UndefinedAtom *fallback = nullptr) + : _owningFile(file), _name(name), _fallback(fallback) {} + + const File &file() const override { return _owningFile; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return CanBeNull::canBeNullNever; } + const UndefinedAtom *fallback() const override { return _fallback; } + +private: + const File &_owningFile; + StringRef _name; + const UndefinedAtom *_fallback; +}; + +/// The base class of all COFF defined atoms. A derived class of +/// COFFBaseDefinedAtom may represent atoms read from a file or atoms created +/// by the linker. An example of the latter case is the jump table for symbols +/// in a DLL. +class COFFBaseDefinedAtom : public DefinedAtom { +public: + enum class Kind { + File, + Internal + }; + + const File &file() const override { return _file; } + StringRef name() const override { return _name; } + Interposable interposable() const override { return interposeNo; } + Merge merge() const override { return mergeNo; } + Alignment alignment() const override { return Alignment(0); } + StringRef customSectionName() const override { return ""; } + DeadStripKind deadStrip() const override { return deadStripNormal; } + + Kind getKind() const { return _kind; } + + void addReference(std::unique_ptr<SimpleReference> reference) { + _references.push_back(std::move(reference)); + } + + reference_iterator begin() const override { + return reference_iterator(*this, reinterpret_cast<const void *>(0)); + } + + reference_iterator end() const override { + return reference_iterator( + *this, reinterpret_cast<const void *>(_references.size())); + } + +protected: + COFFBaseDefinedAtom(const File &file, StringRef name, Kind kind) + : _file(file), _name(name), _kind(kind) {} + +private: + const Reference *derefIterator(const void *iter) const override { + size_t index = reinterpret_cast<size_t>(iter); + return _references[index].get(); + } + + void incrementIterator(const void *&iter) const override { + size_t index = reinterpret_cast<size_t>(iter); + iter = reinterpret_cast<const void *>(index + 1); + } + + const File &_file; + StringRef _name; + Kind _kind; + std::vector<std::unique_ptr<SimpleReference>> _references; +}; + +/// This is the root class of the atom read from a file. This class have two +/// subclasses; one for the regular atom and another for the BSS atom. +class COFFDefinedFileAtom : public COFFBaseDefinedAtom { +public: + COFFDefinedFileAtom(const File &file, StringRef name, StringRef sectionName, + uint64_t sectionSize, Scope scope, + ContentType contentType, ContentPermissions perms, + uint64_t ordinal) + : COFFBaseDefinedAtom(file, name, Kind::File), _sectionName(sectionName), + _sectionSize(sectionSize), _scope(scope), _contentType(contentType), + _permissions(perms), _ordinal(ordinal), _alignment(0) {} + + static bool classof(const COFFBaseDefinedAtom *atom) { + return atom->getKind() == Kind::File; + } + + void setAlignment(Alignment val) { _alignment = val; } + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + Scope scope() const override { return _scope; } + ContentType contentType() const override { return _contentType; } + ContentPermissions permissions() const override { return _permissions; } + uint64_t ordinal() const override { return _ordinal; } + Alignment alignment() const override { return _alignment; } + + void addAssociate(const DefinedAtom *other) { + auto *ref = new SimpleReference(Reference::KindNamespace::all, + Reference::KindArch::all, + lld::Reference::kindAssociate, 0, other, 0); + addReference(std::unique_ptr<SimpleReference>(ref)); + } + +private: + StringRef _sectionName; + uint64_t _sectionSize; + Scope _scope; + ContentType _contentType; + ContentPermissions _permissions; + uint64_t _ordinal; + Alignment _alignment; + std::vector<std::unique_ptr<SimpleReference>> _references; +}; + +// A COFFDefinedAtom represents an atom read from a file and has contents. +class COFFDefinedAtom : public COFFDefinedFileAtom { +public: + COFFDefinedAtom(const File &file, StringRef name, StringRef sectionName, + uint64_t sectionSize, Scope scope, ContentType type, + bool isComdat, ContentPermissions perms, Merge merge, + ArrayRef<uint8_t> data, uint64_t ordinal) + : COFFDefinedFileAtom(file, name, sectionName, sectionSize, + scope, type, perms, ordinal), + _isComdat(isComdat), _merge(merge), _dataref(data) {} + + Merge merge() const override { return _merge; } + uint64_t size() const override { return _dataref.size(); } + ArrayRef<uint8_t> rawContent() const override { return _dataref; } + + DeadStripKind deadStrip() const override { + // Only COMDAT symbols would be dead-stripped. + return _isComdat ? deadStripNormal : deadStripNever; + } + +private: + bool _isComdat; + Merge _merge; + ArrayRef<uint8_t> _dataref; +}; + +// A COFFDefinedAtom represents an atom for BSS section. +class COFFBSSAtom : public COFFDefinedFileAtom { +public: + COFFBSSAtom(const File &file, StringRef name, Scope scope, + ContentPermissions perms, Merge merge, uint32_t size, + uint64_t ordinal) + : COFFDefinedFileAtom(file, name, ".bss", 0, scope, typeZeroFill, + perms, ordinal), + _merge(merge), _size(size) {} + + Merge merge() const override { return _merge; } + uint64_t size() const override { return _size; } + ArrayRef<uint8_t> rawContent() const override { return _contents; } + +private: + Merge _merge; + uint32_t _size; + std::vector<uint8_t> _contents; +}; + +/// A COFFLinkerInternalAtom represents a defined atom created by the linker, +/// not read from file. +class COFFLinkerInternalAtom : public COFFBaseDefinedAtom { +public: + SectionChoice sectionChoice() const override { return sectionBasedOnContent; } + uint64_t ordinal() const override { return _ordinal; } + Scope scope() const override { return scopeGlobal; } + Alignment alignment() const override { return Alignment(0); } + uint64_t size() const override { return _data.size(); } + ArrayRef<uint8_t> rawContent() const override { return _data; } + +protected: + COFFLinkerInternalAtom(const File &file, uint64_t ordinal, + std::vector<uint8_t> data, StringRef symbolName = "") + : COFFBaseDefinedAtom(file, symbolName, Kind::Internal), + _ordinal(ordinal), _data(std::move(data)) {} + +private: + uint64_t _ordinal; + std::vector<uint8_t> _data; +}; + +class COFFStringAtom : public COFFLinkerInternalAtom { +public: + COFFStringAtom(const File &file, uint64_t ordinal, StringRef sectionName, + StringRef contents) + : COFFLinkerInternalAtom(file, ordinal, stringRefToVector(contents)), + _sectionName(sectionName) {} + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return _sectionName; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + +private: + StringRef _sectionName; + + std::vector<uint8_t> stringRefToVector(StringRef name) const { + std::vector<uint8_t> ret(name.size() + 1); + memcpy(&ret[0], name.data(), name.size()); + ret[name.size()] = 0; + return ret; + } +}; + +// A COFFSharedLibraryAtom represents a symbol for data in an import library. A +// reference to a COFFSharedLibraryAtom will be transformed to a real reference +// to an import address table entry in Idata pass. +class COFFSharedLibraryAtom : public SharedLibraryAtom { +public: + COFFSharedLibraryAtom(const File &file, uint16_t hint, StringRef symbolName, + StringRef importName, StringRef dllName) + : _file(file), _hint(hint), _mangledName(addImpPrefix(symbolName)), + _importName(importName), _dllName(dllName), _importTableEntry(nullptr) { + } + + const File &file() const override { return _file; } + uint16_t hint() const { return _hint; } + + /// Returns the symbol name to be used by the core linker. + StringRef name() const override { return _mangledName; } + + /// Returns the symbol name to be used in the import description table in the + /// COFF header. + virtual StringRef importName() const { return _importName; } + + StringRef loadName() const override { return _dllName; } + bool canBeNullAtRuntime() const override { return false; } + Type type() const override { return Type::Unknown; } + uint64_t size() const override { return 0; } + + void setImportTableEntry(const DefinedAtom *atom) { + _importTableEntry = atom; + } + + const DefinedAtom *getImportTableEntry() const { return _importTableEntry; } + +private: + /// Mangle the symbol name by adding "__imp_" prefix. See the file comment of + /// ReaderImportHeader.cpp for details about the prefix. + std::string addImpPrefix(StringRef symbolName) { + std::string ret("__imp_"); + ret.append(symbolName); + return ret; + } + + const File &_file; + uint16_t _hint; + std::string _mangledName; + std::string _importName; + StringRef _dllName; + const DefinedAtom *_importTableEntry; +}; + +// An instance of this class represents "input file" for atoms created in a +// pass. Atoms need to be associated to an input file even if it's not read from +// a file, so we use this class for that. +class VirtualFile : public SimpleFile { +public: + VirtualFile(const LinkingContext &ctx) + : SimpleFile("<virtual-file>"), _nextOrdinal(0) { + setOrdinal(ctx.getNextOrdinalAndIncrement()); + } + + uint64_t getNextOrdinal() { return _nextOrdinal++; } + +private: + uint64_t _nextOrdinal; +}; + +//===----------------------------------------------------------------------===// +// +// Utility functions to handle layout edges. +// +//===----------------------------------------------------------------------===// + +template <typename T, typename U> +void addLayoutEdge(T *a, U *b, uint32_t which) { + auto ref = new SimpleReference(Reference::KindNamespace::all, + Reference::KindArch::all, + which, 0, b, 0); + a->addReference(std::unique_ptr<SimpleReference>(ref)); +} + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/CMakeLists.txt b/lib/ReaderWriter/PECOFF/CMakeLists.txt new file mode 100644 index 000000000000..86b49b79f194 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_library(lldPECOFF + EdataPass.cpp + IdataPass.cpp + LinkerGeneratedSymbolFile.cpp + LoadConfigPass.cpp + PECOFFLinkingContext.cpp + Pass.cpp + ReaderCOFF.cpp + ReaderImportHeader.cpp + WriterImportLibrary.cpp + WriterPECOFF.cpp + LINK_LIBS + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/PECOFF/EdataPass.cpp b/lib/ReaderWriter/PECOFF/EdataPass.cpp new file mode 100644 index 000000000000..ad79f171f3c9 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/EdataPass.cpp @@ -0,0 +1,227 @@ +//===- lib/ReaderWriter/PECOFF/EdataPass.cpp ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Pass.h" +#include "EdataPass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include <climits> +#include <ctime> +#include <utility> + +using lld::pecoff::edata::EdataAtom; +using lld::pecoff::edata::TableEntry; +using llvm::object::export_address_table_entry; +using llvm::object::export_directory_table_entry; + +namespace lld { +namespace pecoff { + +typedef PECOFFLinkingContext::ExportDesc ExportDesc; + +// dedupExports removes duplicate export entries. If two exports are +// referring the same symbol, they are considered duplicates. +// This could happen if the same symbol name is specified as an argument +// to /export more than once, or an unmangled and mangled name of the +// same symbol are given to /export. In the latter case, we choose +// unmangled (shorter) name. +static void dedupExports(PECOFFLinkingContext &ctx) { + std::vector<ExportDesc> &exports = ctx.getDllExports(); + // Pass 1: find duplicate entries + std::set<const ExportDesc *> dup; + std::map<StringRef, ExportDesc *> map; + for (ExportDesc &exp : exports) { + if (!exp.externalName.empty()) + continue; + StringRef symbol = exp.getRealName(); + auto it = map.find(symbol); + if (it == map.end()) { + map[symbol] = &exp; + } else if (symbol.size() < it->second->getRealName().size()) { + map[symbol] = &exp; + dup.insert(it->second); + } else { + dup.insert(&exp); + } + } + // Pass 2: remove duplicate entries + auto pred = [&](const ExportDesc &exp) { + return dup.count(&exp) == 1; + }; + exports.erase(std::remove_if(exports.begin(), exports.end(), pred), + exports.end()); +} + +static void assignOrdinals(PECOFFLinkingContext &ctx) { + std::vector<ExportDesc> &exports = ctx.getDllExports(); + int maxOrdinal = -1; + for (ExportDesc &desc : exports) + maxOrdinal = std::max(maxOrdinal, desc.ordinal); + + std::sort(exports.begin(), exports.end(), + [](const ExportDesc &a, const ExportDesc &b) { + return a.getExternalName().compare(b.getExternalName()) < 0; + }); + + int nextOrdinal = (maxOrdinal == -1) ? 1 : (maxOrdinal + 1); + for (ExportDesc &desc : exports) + if (desc.ordinal == -1) + desc.ordinal = nextOrdinal++; +} + +static bool getExportedAtoms(PECOFFLinkingContext &ctx, MutableFile *file, + std::vector<TableEntry> &ret) { + std::map<StringRef, const DefinedAtom *> definedAtoms; + for (const DefinedAtom *atom : file->defined()) + definedAtoms[atom->name()] = atom; + + for (PECOFFLinkingContext::ExportDesc &desc : ctx.getDllExports()) { + auto it = definedAtoms.find(desc.getRealName()); + if (it == definedAtoms.end()) { + llvm::errs() << "Symbol <" << desc.name + << "> is exported but not defined.\n"; + return false; + } + const DefinedAtom *atom = it->second; + + // One can export a symbol with a different name than the symbol + // name used in DLL. If such name is specified, use it in the + // .edata section. + ret.push_back(TableEntry(ctx.undecorateSymbol(desc.getExternalName()), + desc.ordinal, atom, desc.noname)); + } + std::sort(ret.begin(), ret.end(), + [](const TableEntry &a, const TableEntry &b) { + return a.exportName.compare(b.exportName) < 0; + }); + + return true; +} + +static std::pair<int, int> getOrdinalBase(std::vector<TableEntry> &entries) { + int ordinalBase = INT_MAX; + int maxOrdinal = -1; + for (TableEntry &e : entries) { + ordinalBase = std::min(ordinalBase, e.ordinal); + maxOrdinal = std::max(maxOrdinal, e.ordinal); + } + return std::pair<int, int>(ordinalBase, maxOrdinal); +} + +edata::EdataAtom * +EdataPass::createAddressTable(const std::vector<TableEntry> &entries, + int ordinalBase, int maxOrdinal) { + EdataAtom *addressTable = + new (_alloc) EdataAtom(_file, sizeof(export_address_table_entry) * + (maxOrdinal - ordinalBase + 1)); + + for (const TableEntry &e : entries) { + int index = e.ordinal - ordinalBase; + size_t offset = index * sizeof(export_address_table_entry); + addDir32NBReloc(addressTable, e.atom, _ctx.getMachineType(), offset); + } + return addressTable; +} + +edata::EdataAtom * +EdataPass::createNamePointerTable(const PECOFFLinkingContext &ctx, + const std::vector<TableEntry> &entries, + MutableFile *file) { + EdataAtom *table = + new (_alloc) EdataAtom(_file, sizeof(uint32_t) * entries.size()); + + size_t offset = 0; + for (const TableEntry &e : entries) { + auto *stringAtom = new (_alloc) COFFStringAtom( + _file, _stringOrdinal++, ".edata", e.exportName); + file->addAtom(*stringAtom); + addDir32NBReloc(table, stringAtom, _ctx.getMachineType(), offset); + offset += sizeof(uint32_t); + } + return table; +} + +edata::EdataAtom *EdataPass::createExportDirectoryTable( + const std::vector<edata::TableEntry> &namedEntries, int ordinalBase, + int maxOrdinal) { + EdataAtom *ret = + new (_alloc) EdataAtom(_file, sizeof(export_directory_table_entry)); + auto *data = ret->getContents<export_directory_table_entry>(); + data->TimeDateStamp = time(nullptr); + data->OrdinalBase = ordinalBase; + data->AddressTableEntries = maxOrdinal - ordinalBase + 1; + data->NumberOfNamePointers = namedEntries.size(); + return ret; +} + +edata::EdataAtom * +EdataPass::createOrdinalTable(const std::vector<TableEntry> &entries, + int ordinalBase) { + EdataAtom *ret = + new (_alloc) EdataAtom(_file, sizeof(uint16_t) * entries.size()); + uint16_t *data = ret->getContents<uint16_t>(); + int i = 0; + for (const TableEntry &e : entries) + data[i++] = e.ordinal - ordinalBase; + return ret; +} + +void EdataPass::perform(std::unique_ptr<MutableFile> &file) { + dedupExports(_ctx); + assignOrdinals(_ctx); + + std::vector<TableEntry> entries; + if (!getExportedAtoms(_ctx, file.get(), entries)) + return; + if (entries.empty()) + return; + + int ordinalBase, maxOrdinal; + std::tie(ordinalBase, maxOrdinal) = getOrdinalBase(entries); + + std::vector<TableEntry> namedEntries; + for (TableEntry &e : entries) + if (!e.noname) + namedEntries.push_back(e); + + EdataAtom *table = + createExportDirectoryTable(namedEntries, ordinalBase, maxOrdinal); + file->addAtom(*table); + + COFFStringAtom *dllName = + new (_alloc) COFFStringAtom(_file, _stringOrdinal++, ".edata", + llvm::sys::path::filename(_ctx.outputPath())); + file->addAtom(*dllName); + addDir32NBReloc(table, dllName, _ctx.getMachineType(), + offsetof(export_directory_table_entry, NameRVA)); + + EdataAtom *addressTable = + createAddressTable(entries, ordinalBase, maxOrdinal); + file->addAtom(*addressTable); + addDir32NBReloc( + table, addressTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, ExportAddressTableRVA)); + + EdataAtom *namePointerTable = + createNamePointerTable(_ctx, namedEntries, file.get()); + file->addAtom(*namePointerTable); + addDir32NBReloc(table, namePointerTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, NamePointerRVA)); + + EdataAtom *ordinalTable = createOrdinalTable(namedEntries, ordinalBase); + file->addAtom(*ordinalTable); + addDir32NBReloc(table, ordinalTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, OrdinalTableRVA)); +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/EdataPass.h b/lib/ReaderWriter/PECOFF/EdataPass.h new file mode 100644 index 000000000000..442be3ca24aa --- /dev/null +++ b/lib/ReaderWriter/PECOFF/EdataPass.h @@ -0,0 +1,99 @@ +//===- lib/ReaderWriter/PECOFF/EdataPass.h --------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates atoms for the DLL export +/// information. The defined atoms constructed in this pass will go into .edata +/// section. +/// +/// For the details of the .edata section format, see Microsoft PE/COFF +/// Specification section 5.3, The .edata Section. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_EDATA_PASS_H +#define LLD_READER_WRITER_PE_COFF_EDATA_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/COFF.h" +#include <map> + +using llvm::COFF::ImportDirectoryTableEntry; + +namespace lld { +namespace pecoff { +namespace edata { + +struct TableEntry { + TableEntry(StringRef exp, int ord, const DefinedAtom *a, bool n) + : exportName(exp), ordinal(ord), atom(a), noname(n) {} + std::string exportName; + int ordinal; + const DefinedAtom *atom; + bool noname; +}; + +/// The root class of all edata atoms. +class EdataAtom : public COFFLinkerInternalAtom { +public: + EdataAtom(VirtualFile &file, size_t size) + : COFFLinkerInternalAtom(file, file.getNextOrdinal(), + std::vector<uint8_t>(size)) {} + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".edata"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + + template <typename T> T *getContents() const { + return (T *)const_cast<uint8_t *>(rawContent().data()); + } +}; + +} // namespace edata + +class EdataPass : public lld::Pass { +public: + EdataPass(PECOFFLinkingContext &ctx) + : _ctx(ctx), _file(ctx), _is64(ctx.is64Bit()), _stringOrdinal(1024) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + edata::EdataAtom * + createExportDirectoryTable(const std::vector<edata::TableEntry> &namedEntries, + int ordinalBase, int maxOrdinal); + + edata::EdataAtom * + createAddressTable(const std::vector<edata::TableEntry> &entries, + int ordinalBase, int maxOrdinal); + + edata::EdataAtom * + createNamePointerTable(const PECOFFLinkingContext &ctx, + const std::vector<edata::TableEntry> &entries, + MutableFile *file); + + edata::EdataAtom * + createOrdinalTable(const std::vector<edata::TableEntry> &entries, + int ordinalBase); + + PECOFFLinkingContext &_ctx; + VirtualFile _file; + bool _is64; + int _stringOrdinal; + mutable llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/IdataPass.cpp b/lib/ReaderWriter/PECOFF/IdataPass.cpp new file mode 100644 index 000000000000..d41ef581f7fa --- /dev/null +++ b/lib/ReaderWriter/PECOFF/IdataPass.cpp @@ -0,0 +1,345 @@ +//===- lib/ReaderWriter/PECOFF/IdataPass.cpp ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IdataPass.h" +#include "Pass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include <algorithm> +#include <cstddef> +#include <cstring> +#include <map> +#include <vector> + +using namespace llvm::support::endian; +using llvm::object::delay_import_directory_table_entry; + +namespace lld { +namespace pecoff { +namespace idata { + +IdataAtom::IdataAtom(IdataContext &context, std::vector<uint8_t> data) + : COFFLinkerInternalAtom(context.dummyFile, + context.dummyFile.getNextOrdinal(), data) { + context.file.addAtom(*this); +} + +HintNameAtom::HintNameAtom(IdataContext &context, uint16_t hint, + StringRef importName) + : IdataAtom(context, assembleRawContent(hint, importName)), + _importName(importName) {} + +std::vector<uint8_t> HintNameAtom::assembleRawContent(uint16_t hint, + StringRef importName) { + size_t size = + llvm::RoundUpToAlignment(sizeof(hint) + importName.size() + 1, 2); + std::vector<uint8_t> ret(size); + ret[importName.size()] = 0; + ret[importName.size() - 1] = 0; + write16le(&ret[0], hint); + std::memcpy(&ret[2], importName.data(), importName.size()); + return ret; +} + +std::vector<uint8_t> +ImportTableEntryAtom::assembleRawContent(uint64_t rva, bool is64) { + // The element size of the import table is 32 bit in PE and 64 bit + // in PE+. In PE+, bits 62-31 are filled with zero. + if (is64) { + std::vector<uint8_t> ret(8); + write64le(&ret[0], rva); + return ret; + } + std::vector<uint8_t> ret(4); + write32le(&ret[0], rva); + return ret; +} + +static std::vector<ImportTableEntryAtom *> +createImportTableAtoms(IdataContext &context, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms, + bool shouldAddReference, StringRef sectionName, + llvm::BumpPtrAllocator &alloc) { + std::vector<ImportTableEntryAtom *> ret; + for (COFFSharedLibraryAtom *atom : sharedAtoms) { + ImportTableEntryAtom *entry = nullptr; + if (atom->importName().empty()) { + // Import by ordinal + uint64_t hint = atom->hint(); + hint |= context.ctx.is64Bit() ? (uint64_t(1) << 63) : (uint64_t(1) << 31); + entry = new (alloc) ImportTableEntryAtom(context, hint, sectionName); + } else { + // Import by name + entry = new (alloc) ImportTableEntryAtom(context, 0, sectionName); + HintNameAtom *hintName = + new (alloc) HintNameAtom(context, atom->hint(), atom->importName()); + addDir32NBReloc(entry, hintName, context.ctx.getMachineType(), 0); + } + ret.push_back(entry); + if (shouldAddReference) + atom->setImportTableEntry(entry); + } + // Add the NULL entry. + ret.push_back(new (alloc) ImportTableEntryAtom(context, 0, sectionName)); + return ret; +} + +// Creates atoms for an import lookup table. The import lookup table is an +// array of pointers to hint/name atoms. The array needs to be terminated with +// the NULL entry. +void ImportDirectoryAtom::addRelocations( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) { + // Create parallel arrays. The contents of the two are initially the + // same. The PE/COFF loader overwrites the import address tables with the + // pointers to the referenced items after loading the executable into + // memory. + std::vector<ImportTableEntryAtom *> importLookupTables = + createImportTableAtoms(context, sharedAtoms, false, ".idata.t", _alloc); + std::vector<ImportTableEntryAtom *> importAddressTables = + createImportTableAtoms(context, sharedAtoms, true, ".idata.a", _alloc); + + addDir32NBReloc(this, importLookupTables[0], context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, ImportLookupTableRVA)); + addDir32NBReloc(this, importAddressTables[0], context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, ImportAddressTableRVA)); + auto *atom = new (_alloc) + COFFStringAtom(context.dummyFile, context.dummyFile.getNextOrdinal(), + ".idata", loadName); + context.file.addAtom(*atom); + addDir32NBReloc(this, atom, context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, NameRVA)); +} + +// Create the contents for the delay-import table. +std::vector<uint8_t> DelayImportDirectoryAtom::createContent() { + std::vector<uint8_t> r(sizeof(delay_import_directory_table_entry), 0); + auto entry = reinterpret_cast<delay_import_directory_table_entry *>(&r[0]); + // link.exe seems to set 1 to Attributes field, so do we. + entry->Attributes = 1; + return r; +} + +// Find "___delayLoadHelper2@8" (or "__delayLoadHelper2" on x64). +// This is not efficient but should be OK for now. +static const Atom * +findDelayLoadHelper(MutableFile &file, const PECOFFLinkingContext &ctx) { + StringRef sym = ctx.getDelayLoadHelperName(); + for (const DefinedAtom *atom : file.defined()) + if (atom->name() == sym) + return atom; + std::string msg = (sym + " was not found").str(); + llvm_unreachable(msg.c_str()); +} + +// Create the data referred by the delay-import table. +void DelayImportDirectoryAtom::addRelocations( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) { + // "ModuleHandle" field. This points to an array of pointer-size data + // in ".data" section. Initially the array is initialized with zero. + // The delay-load import helper will set DLL base address at runtime. + auto *hmodule = new (_alloc) DelayImportAddressAtom(context); + addDir32NBReloc(this, hmodule, context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, ModuleHandle)); + + // "NameTable" field. The data structure of this field is the same + // as (non-delay) import table's Import Lookup Table. Contains + // imported function names. This is a parallel array of AddressTable + // field. + std::vector<ImportTableEntryAtom *> nameTable = + createImportTableAtoms(context, sharedAtoms, false, ".didat", _alloc); + addDir32NBReloc( + this, nameTable[0], context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, DelayImportNameTable)); + + // "Name" field. This points to the NUL-terminated DLL name string. + auto *name = new (_alloc) + COFFStringAtom(context.dummyFile, context.dummyFile.getNextOrdinal(), + ".didat", loadName); + context.file.addAtom(*name); + addDir32NBReloc(this, name, context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, Name)); + + // "AddressTable" field. This points to an array of pointers, which + // in turn pointing to delay-load functions. + std::vector<DelayImportAddressAtom *> addrTable; + for (int i = 0, e = sharedAtoms.size() + 1; i < e; ++i) + addrTable.push_back(new (_alloc) DelayImportAddressAtom(context)); + for (int i = 0, e = sharedAtoms.size(); i < e; ++i) + sharedAtoms[i]->setImportTableEntry(addrTable[i]); + addDir32NBReloc( + this, addrTable[0], context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, DelayImportAddressTable)); + + const Atom *delayLoadHelper = findDelayLoadHelper(context.file, context.ctx); + for (int i = 0, e = sharedAtoms.size(); i < e; ++i) { + const DefinedAtom *loader = new (_alloc) DelayLoaderAtom( + context, addrTable[i], this, delayLoadHelper); + if (context.ctx.is64Bit()) + addDir64Reloc(addrTable[i], loader, context.ctx.getMachineType(), 0); + else + addDir32Reloc(addrTable[i], loader, context.ctx.getMachineType(), 0); + } +} + +DelayLoaderAtom::DelayLoaderAtom(IdataContext &context, const Atom *impAtom, + const Atom *descAtom, const Atom *delayLoadHelperAtom) + : IdataAtom(context, createContent(context.ctx.getMachineType())) { + MachineTypes machine = context.ctx.getMachineType(); + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addDir32Reloc(this, impAtom, machine, 3); + addDir32Reloc(this, descAtom, machine, 8); + addRel32Reloc(this, delayLoadHelperAtom, machine, 13); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addRel32Reloc(this, impAtom, machine, 36); + addRel32Reloc(this, descAtom, machine, 43); + addRel32Reloc(this, delayLoadHelperAtom, machine, 48); + break; + default: + llvm::report_fatal_error("unsupported machine type"); + } +} + +// DelayLoaderAtom contains a wrapper function for __delayLoadHelper2. +// +// __delayLoadHelper2 takes two pointers: a pointer to the delay-load +// table descripter and a pointer to _imp_ symbol for the function +// to be resolved. +// +// __delayLoadHelper2 looks at the table descriptor to know the DLL +// name, calls dlopen()-like function to load it, resolves all +// imported symbols, and then writes the resolved addresses to the +// import address table. It returns a pointer to the resolved +// function. +// +// __delayLoadHelper2 is defined in delayimp.lib. +std::vector<uint8_t> +DelayLoaderAtom::createContent(MachineTypes machine) const { + static const uint8_t x86[] = { + 0x51, // push ecx + 0x52, // push edx + 0x68, 0, 0, 0, 0, // push offset ___imp__<FUNCNAME> + 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll + 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8 + 0x5A, // pop edx + 0x59, // pop ecx + 0xFF, 0xE0, // jmp eax + }; + static const uint8_t x64[] = { + 0x51, // push rcx + 0x52, // push rdx + 0x41, 0x50, // push r8 + 0x41, 0x51, // push r9 + 0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h + 0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0 + 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1 + 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2 + 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3 + 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_<FUNCNAME>] + 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...] + 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2 + 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp] + 0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h] + 0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h] + 0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h] + 0x48, 0x83, 0xC4, 0x48, // add rsp, 48h + 0x41, 0x59, // pop r9 + 0x41, 0x58, // pop r8 + 0x5A, // pop rdx + 0x59, // pop rcx + 0xFF, 0xE0, // jmp rax + }; + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + return std::vector<uint8_t>(x86, x86 + sizeof(x86)); + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + return std::vector<uint8_t>(x64, x64 + sizeof(x64)); + default: + llvm::report_fatal_error("unsupported machine type"); + } +} + +} // namespace idata + +void IdataPass::perform(std::unique_ptr<MutableFile> &file) { + if (file->sharedLibrary().empty()) + return; + + idata::IdataContext context(*file, _dummyFile, _ctx); + std::map<StringRef, std::vector<COFFSharedLibraryAtom *>> sharedAtoms = + groupByLoadName(*file); + bool hasImports = false; + bool hasDelayImports = false; + + // Create the import table and terminate it with the null entry. + for (auto i : sharedAtoms) { + StringRef loadName = i.first; + if (_ctx.isDelayLoadDLL(loadName)) + continue; + hasImports = true; + std::vector<COFFSharedLibraryAtom *> &atoms = i.second; + new (_alloc) idata::ImportDirectoryAtom(context, loadName, atoms); + } + if (hasImports) + new (_alloc) idata::NullImportDirectoryAtom(context); + + // Create the delay import table and terminate it with the null entry. + for (auto i : sharedAtoms) { + StringRef loadName = i.first; + if (!_ctx.isDelayLoadDLL(loadName)) + continue; + hasDelayImports = true; + std::vector<COFFSharedLibraryAtom *> &atoms = i.second; + new (_alloc) idata::DelayImportDirectoryAtom(context, loadName, atoms); + } + if (hasDelayImports) + new (_alloc) idata::DelayNullImportDirectoryAtom(context); + + replaceSharedLibraryAtoms(*file); +} + +std::map<StringRef, std::vector<COFFSharedLibraryAtom *> > +IdataPass::groupByLoadName(MutableFile &file) { + std::map<StringRef, COFFSharedLibraryAtom *> uniqueAtoms; + for (const SharedLibraryAtom *atom : file.sharedLibrary()) + uniqueAtoms[atom->name()] = + (COFFSharedLibraryAtom *)const_cast<SharedLibraryAtom *>(atom); + + std::map<StringRef, std::vector<COFFSharedLibraryAtom *> > ret; + for (auto i : uniqueAtoms) { + COFFSharedLibraryAtom *atom = i.second; + ret[atom->loadName()].push_back(atom); + } + return ret; +} + +/// Transforms a reference to a COFFSharedLibraryAtom to a real reference. +void IdataPass::replaceSharedLibraryAtoms(MutableFile &file) { + for (const DefinedAtom *atom : file.defined()) { + for (const Reference *ref : *atom) { + const Atom *target = ref->target(); + auto *sharedAtom = dyn_cast<SharedLibraryAtom>(target); + if (!sharedAtom) + continue; + const auto *coffSharedAtom = (const COFFSharedLibraryAtom *)sharedAtom; + const DefinedAtom *entry = coffSharedAtom->getImportTableEntry(); + const_cast<Reference *>(ref)->setTarget(entry); + } + } +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/IdataPass.h b/lib/ReaderWriter/PECOFF/IdataPass.h new file mode 100644 index 000000000000..9db82160339a --- /dev/null +++ b/lib/ReaderWriter/PECOFF/IdataPass.h @@ -0,0 +1,218 @@ +//===- lib/ReaderWriter/PECOFF/IdataPass.h---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates atoms for the DLL import +/// information. The defined atoms constructed in this pass will go into .idata +/// section, unless .idata section is merged with other section such as .data. +/// +/// For the details of the .idata section format, see Microsoft PE/COFF +/// Specification section 5.4, The .idata Section. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_IDATA_PASS_H +#define LLD_READER_WRITER_PE_COFF_IDATA_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/COFF.h" +#include <algorithm> +#include <map> + +using llvm::COFF::ImportDirectoryTableEntry; + +namespace lld { +namespace pecoff { +namespace idata { + +class DLLNameAtom; +class HintNameAtom; +class ImportTableEntryAtom; + +// A state object of this pass. +struct IdataContext { + IdataContext(MutableFile &f, VirtualFile &g, const PECOFFLinkingContext &c) + : file(f), dummyFile(g), ctx(c) {} + MutableFile &file; + VirtualFile &dummyFile; + const PECOFFLinkingContext &ctx; +}; + +/// The root class of all idata atoms. +class IdataAtom : public COFFLinkerInternalAtom { +public: + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".idata"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + +protected: + IdataAtom(IdataContext &context, std::vector<uint8_t> data); +}; + +/// A HintNameAtom represents a symbol that will be imported from a DLL at +/// runtime. It consists with an optional hint, which is a small integer, and a +/// symbol name. +/// +/// A hint is an index of the export pointer table in a DLL. If the import +/// library and DLL is in sync (i.e., ".lib" and ".dll" is for the same version +/// or the symbol ordinal is maintained by hand with ".exp" file), the PE/COFF +/// loader can find the symbol quickly. +class HintNameAtom : public IdataAtom { +public: + HintNameAtom(IdataContext &context, uint16_t hint, StringRef importName); + + StringRef getContentString() { return _importName; } + +private: + std::vector<uint8_t> assembleRawContent(uint16_t hint, StringRef importName); + StringRef _importName; +}; + +class ImportTableEntryAtom : public IdataAtom { +public: + ImportTableEntryAtom(IdataContext &ctx, uint64_t contents, + StringRef sectionName) + : IdataAtom(ctx, assembleRawContent(contents, ctx.ctx.is64Bit())), + _sectionName(sectionName) {} + + StringRef customSectionName() const override { + return _sectionName; + }; + +private: + std::vector<uint8_t> assembleRawContent(uint64_t contents, bool is64); + StringRef _sectionName; +}; + +/// An ImportDirectoryAtom includes information to load a DLL, including a DLL +/// name, symbols that will be resolved from the DLL, and the import address +/// table that are overwritten by the loader with the pointers to the referenced +/// items. The executable has one ImportDirectoryAtom per one imported DLL. +class ImportDirectoryAtom : public IdataAtom { +public: + ImportDirectoryAtom(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) + : IdataAtom(context, std::vector<uint8_t>(20, 0)) { + addRelocations(context, loadName, sharedAtoms); + } + + StringRef customSectionName() const override { return ".idata.d"; } + +private: + void addRelocations(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms); + + mutable llvm::BumpPtrAllocator _alloc; +}; + +/// The last NULL entry in the import directory. +class NullImportDirectoryAtom : public IdataAtom { +public: + explicit NullImportDirectoryAtom(IdataContext &context) + : IdataAtom(context, std::vector<uint8_t>(20, 0)) {} + + StringRef customSectionName() const override { return ".idata.d"; } +}; + +/// The class for the the delay-load import table. +class DelayImportDirectoryAtom : public IdataAtom { +public: + DelayImportDirectoryAtom( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) + : IdataAtom(context, createContent()) { + addRelocations(context, loadName, sharedAtoms); + } + + StringRef customSectionName() const override { return ".didat.d"; } + +private: + std::vector<uint8_t> createContent(); + void addRelocations(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms); + + mutable llvm::BumpPtrAllocator _alloc; +}; + +/// Terminator of the delay-load import table. The content of this atom is all +/// zero. +class DelayNullImportDirectoryAtom : public IdataAtom { +public: + explicit DelayNullImportDirectoryAtom(IdataContext &context) + : IdataAtom(context, createContent()) {} + StringRef customSectionName() const override { return ".didat.d"; } + +private: + std::vector<uint8_t> createContent() const { + return std::vector<uint8_t>( + sizeof(llvm::object::delay_import_directory_table_entry), 0); + } +}; + +class DelayImportAddressAtom : public IdataAtom { +public: + explicit DelayImportAddressAtom(IdataContext &context) + : IdataAtom(context, createContent(context.ctx)), + _align(Alignment(context.ctx.is64Bit() ? 3 : 2)) {} + StringRef customSectionName() const override { return ".data"; } + ContentPermissions permissions() const override { return permRW_; } + Alignment alignment() const override { return _align; } + +private: + std::vector<uint8_t> createContent(const PECOFFLinkingContext &ctx) const { + return std::vector<uint8_t>(ctx.is64Bit() ? 8 : 4, 0); + } + + Alignment _align; +}; + +// DelayLoaderAtom contains a wrapper function for __delayLoadHelper2. +class DelayLoaderAtom : public IdataAtom { +public: + DelayLoaderAtom(IdataContext &context, const Atom *impAtom, + const Atom *descAtom, const Atom *delayLoadHelperAtom); + StringRef customSectionName() const override { return ".text"; } + ContentPermissions permissions() const override { return permR_X; } + Alignment alignment() const override { return Alignment(0); } + +private: + std::vector<uint8_t> createContent(MachineTypes machine) const; +}; + +} // namespace idata + +class IdataPass : public lld::Pass { +public: + IdataPass(const PECOFFLinkingContext &ctx) : _dummyFile(ctx), _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + std::map<StringRef, std::vector<COFFSharedLibraryAtom *>> + groupByLoadName(MutableFile &file); + + void replaceSharedLibraryAtoms(MutableFile &file); + + // A dummy file with which all the atoms created in the pass will be + // associated. Atoms need to be associated to an input file even if it's not + // read from a file, so we use this object. + VirtualFile _dummyFile; + + const PECOFFLinkingContext &_ctx; + llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/InferSubsystemPass.h b/lib/ReaderWriter/PECOFF/InferSubsystemPass.h new file mode 100644 index 000000000000..cbf863ee4784 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/InferSubsystemPass.h @@ -0,0 +1,66 @@ +//===- lib/ReaderWriter/PECOFF/InferSubsystemPass.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_INFER_SUBSYSTEM_PASS_H +#define LLD_READER_WRITER_PE_COFF_INFER_SUBSYSTEM_PASS_H + +#include "Atoms.h" +#include "lld/Core/Pass.h" +#include <vector> + +namespace lld { +namespace pecoff { + +// Infers subsystem from entry point function name. +class InferSubsystemPass : public lld::Pass { +public: + InferSubsystemPass(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override { + if (_ctx.getSubsystem() != WindowsSubsystem::IMAGE_SUBSYSTEM_UNKNOWN) + return; + + if (_ctx.isDll()) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI); + return; + } + + // Scan the resolved symbols to infer the subsystem. + const std::string wWinMain = _ctx.decorateSymbol("wWinMainCRTStartup"); + const std::string wWinMainAt = _ctx.decorateSymbol("wWinMainCRTStartup@"); + const std::string winMain = _ctx.decorateSymbol("WinMainCRTStartup"); + const std::string winMainAt = _ctx.decorateSymbol("WinMainCRTStartup@"); + const std::string wmain = _ctx.decorateSymbol("wmainCRTStartup"); + const std::string wmainAt = _ctx.decorateSymbol("wmainCRTStartup@"); + const std::string main = _ctx.decorateSymbol("mainCRTStartup"); + const std::string mainAt = _ctx.decorateSymbol("mainCRTStartup@"); + + for (const DefinedAtom *atom : file->definedAtoms()) { + if (atom->name() == wWinMain || atom->name().startswith(wWinMainAt) || + atom->name() == winMain || atom->name().startswith(winMainAt)) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI); + return; + } + if (atom->name() == wmain || atom->name().startswith(wmainAt) || + atom->name() == main || atom->name().startswith(mainAt)) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_CUI); + return; + } + } + llvm::report_fatal_error("Failed to infer subsystem"); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp new file mode 100644 index 000000000000..a11410784b8c --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp @@ -0,0 +1,48 @@ +//===- lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LinkerGeneratedSymbolFile.h" + +namespace lld { +namespace pecoff { + +// Find decorated symbol, namely /sym@[0-9]+/ or /\?sym@@.+/. +bool findDecoratedSymbol(PECOFFLinkingContext *ctx, + std::string sym, std::string &res) { + const std::set<std::string> &defined = ctx->definedSymbols(); + // Search for /sym@[0-9]+/ + { + std::string s = sym + '@'; + auto it = defined.lower_bound(s); + for (auto e = defined.end(); it != e; ++it) { + if (!StringRef(*it).startswith(s)) + break; + if (it->size() == s.size()) + continue; + StringRef suffix = StringRef(*it).substr(s.size()); + if (suffix.find_first_not_of("0123456789") != StringRef::npos) + continue; + res = *it; + return true; + } + } + // Search for /\?sym@@.+/ + { + std::string s = "?" + ctx->undecorateSymbol(sym).str() + "@@"; + auto it = defined.lower_bound(s); + if (it != defined.end() && StringRef(*it).startswith(s)) { + res = *it; + return true; + } + } + return false; +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h new file mode 100644 index 000000000000..b9764d70bb3b --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h @@ -0,0 +1,309 @@ +//===- lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/Allocator.h" +#include <algorithm> +#include <mutex> + +using llvm::COFF::WindowsSubsystem; + +namespace lld { +namespace pecoff { + +bool findDecoratedSymbol(PECOFFLinkingContext *ctx, + std::string sym, std::string &res); + +namespace impl { + +/// The defined atom for dllexported symbols with __imp_ prefix. +class ImpPointerAtom : public COFFLinkerInternalAtom { +public: + ImpPointerAtom(const File &file, StringRef symbolName, uint64_t ordinal) + : COFFLinkerInternalAtom(file, /*oridnal*/ 0, std::vector<uint8_t>(4), + symbolName), + _ordinal(ordinal) {} + + uint64_t ordinal() const override { return _ordinal; } + Scope scope() const override { return scopeGlobal; } + ContentType contentType() const override { return typeData; } + Alignment alignment() const override { return Alignment(4); } + ContentPermissions permissions() const override { return permR__; } + +private: + uint64_t _ordinal; +}; + +class ImpSymbolFile : public SimpleFile { +public: + ImpSymbolFile(StringRef defsym, StringRef undefsym, uint64_t ordinal, + bool is64) + : SimpleFile(defsym), _undefined(*this, undefsym), + _defined(*this, defsym, ordinal) { + SimpleReference *ref; + if (is64) { + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32, + 0, &_undefined, 0); + } else { + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32, + 0, &_undefined, 0); + } + _defined.addReference(std::unique_ptr<SimpleReference>(ref)); + addAtom(_defined); + addAtom(_undefined); + }; + +private: + SimpleUndefinedAtom _undefined; + ImpPointerAtom _defined; +}; + +// A file to make Resolver to resolve a symbol TO instead of a symbol FROM, +// using fallback mechanism for an undefined symbol. One can virtually rename an +// undefined symbol using this file. +class SymbolRenameFile : public SimpleFile { +public: + SymbolRenameFile(StringRef from, StringRef to) + : SimpleFile("<symbol-rename>"), _fromSym(from), _toSym(to), + _from(*this, _fromSym, &_to), _to(*this, _toSym) { + addAtom(_from); + }; + +private: + std::string _fromSym; + std::string _toSym; + COFFUndefinedAtom _from; + COFFUndefinedAtom _to; +}; + +} // namespace impl + +// A virtual file containing absolute symbol __ImageBase. __ImageBase (or +// ___ImageBase on x86) is a linker-generated symbol whose address is the same +// as the image base address. +class LinkerGeneratedSymbolFile : public SimpleFile { +public: + LinkerGeneratedSymbolFile(const PECOFFLinkingContext &ctx) + : SimpleFile("<linker-internal-file>"), + _imageBaseAtom(*this, ctx.decorateSymbol("__ImageBase"), + Atom::scopeGlobal, ctx.getBaseAddress()) { + addAtom(_imageBaseAtom); + }; + +private: + SimpleAbsoluteAtom _imageBaseAtom; +}; + +// A LocallyImporteSymbolFile is an archive file containing __imp_ +// symbols for local use. +// +// For each defined symbol, linker creates an implicit defined symbol +// by appending "__imp_" prefix to the original name. The content of +// the implicit symbol is a pointer to the original symbol +// content. This feature allows one to compile and link the following +// code without error, although _imp__hello is not defined in the +// code. (the leading "_" in this example is automatically appended, +// assuming it's x86.) +// +// void hello() { printf("Hello\n"); } +// extern void (*_imp__hello)(); +// int main() { +// _imp__hello(); +// return 0; +// } +// +// This odd feature is for the compatibility with MSVC link.exe. +class LocallyImportedSymbolFile : public SimpleArchiveLibraryFile { +public: + LocallyImportedSymbolFile(const PECOFFLinkingContext &ctx) + : SimpleArchiveLibraryFile("__imp_"), _is64(ctx.is64Bit()), + _ordinal(0) {} + + File *find(StringRef sym, bool dataSymbolOnly) override { + std::string prefix = "__imp_"; + if (!sym.startswith(prefix)) + return nullptr; + StringRef undef = sym.substr(prefix.size()); + return new (_alloc) impl::ImpSymbolFile(sym, undef, _ordinal++, _is64); + } + +private: + bool _is64; + uint64_t _ordinal; + llvm::BumpPtrAllocator _alloc; +}; + +// A ExportedSymbolRenameFile is a virtual archive file for dllexported symbols. +// +// One usually has to specify the exact symbol name to resolve it. That's true +// in most cases for PE/COFF, except the one described below. +// +// DLLExported symbols can be specified using a module definition file. In a +// file, one can write an EXPORT directive followed by symbol names. Such +// symbols may not be fully decorated. +// +// If a symbol FOO is specified to be dllexported by a module definition file, +// linker has to search not only for /FOO/ but also for /FOO@[0-9]+/ for stdcall +// and for /\?FOO@@.+/ for C++. This ambiguous matching semantics does not fit +// well with Resolver. +// +// We could probably modify Resolver to resolve ambiguous symbols, but I think +// we don't want to do that because it'd be rarely used, and only this Windows +// specific feature would use it. It's probably not a good idea to make the core +// linker to be able to deal with it. +// +// So, instead of tweaking Resolver, we chose to do some hack here. An +// ExportedSymbolRenameFile maintains a set containing all possibly defined +// symbol names. That set would be a union of (1) all the defined symbols that +// are already parsed and read and (2) all the defined symbols in archive files +// that are not yet be parsed. +// +// If Resolver asks this file to return an atom for a dllexported symbol, find() +// looks up the set, doing ambiguous matching. If there's a symbol with @ +// prefix, it returns an atom to rename the dllexported symbol, hoping that +// Resolver will find the new symbol with atsign from an archive file at the +// next visit. +class ExportedSymbolRenameFile : public SimpleArchiveLibraryFile { +public: + ExportedSymbolRenameFile(const PECOFFLinkingContext &ctx) + : SimpleArchiveLibraryFile("<export>"), + _ctx(const_cast<PECOFFLinkingContext *>(&ctx)) { + for (PECOFFLinkingContext::ExportDesc &desc : _ctx->getDllExports()) + _exportedSyms.insert(desc.name); + } + + File *find(StringRef sym, bool dataSymbolOnly) override { + typedef PECOFFLinkingContext::ExportDesc ExportDesc; + if (_exportedSyms.count(sym) == 0) + return nullptr; + std::string replace; + if (!findDecoratedSymbol(_ctx, sym.str(), replace)) + return nullptr; + + for (ExportDesc &exp : _ctx->getDllExports()) + if (exp.name == sym) + exp.mangledName = replace; + if (_ctx->deadStrip()) + _ctx->addDeadStripRoot(_ctx->allocate(replace)); + return new (_alloc) impl::SymbolRenameFile(sym, replace); + } + +private: + std::set<std::string> _exportedSyms; + llvm::BumpPtrAllocator _alloc; + PECOFFLinkingContext *_ctx; +}; + +// Windows has not only one but many entry point functions. The +// appropriate one is automatically selected based on the subsystem +// setting and the user-supplied entry point function. +// +// http://msdn.microsoft.com/en-us/library/f9t8842e.aspx +class EntryPointFile : public SimpleFile { +public: + EntryPointFile(const PECOFFLinkingContext &ctx) + : SimpleFile("<entry>"), _ctx(const_cast<PECOFFLinkingContext *>(&ctx)), + _firstTime(true) {} + + const atom_collection<UndefinedAtom> &undefined() const override { + return const_cast<EntryPointFile *>(this)->getUndefinedAtoms(); + } + +private: + const atom_collection<UndefinedAtom> &getUndefinedAtoms() { + std::lock_guard<std::mutex> lock(_mutex); + if (!_firstTime) + return _undefinedAtoms; + _firstTime = false; + + if (_ctx->hasEntry()) { + StringRef entrySym = _ctx->allocate(getEntry()); + _undefinedAtoms._atoms.push_back( + new (_alloc) SimpleUndefinedAtom(*this, entrySym)); + _ctx->setHasEntry(true); + _ctx->setEntrySymbolName(entrySym); + if (_ctx->deadStrip()) + _ctx->addDeadStripRoot(entrySym); + } + return _undefinedAtoms; + } + + // Returns the entry point function name. + std::string getEntry() const { + StringRef opt = _ctx->getEntrySymbolName(); + if (!opt.empty()) { + std::string mangled; + if (findDecoratedSymbol(_ctx, opt, mangled)) + return mangled; + return _ctx->decorateSymbol(opt); + } + return _ctx->decorateSymbol(getDefaultEntry()); + } + + std::string getDefaultEntry() const { + const std::string wWinMainCRTStartup = "wWinMainCRTStartup"; + const std::string WinMainCRTStartup = "WinMainCRTStartup"; + const std::string wmainCRTStartup = "wmainCRTStartup"; + const std::string mainCRTStartup = "mainCRTStartup"; + + if (_ctx->isDll()) { + if (_ctx->getMachineType() == llvm::COFF::IMAGE_FILE_MACHINE_I386) + return "_DllMainCRTStartup@12"; + return "_DllMainCRTStartup"; + } + + // Returns true if a given name exists in an input object file. + auto defined = [&](StringRef name) -> bool { + StringRef sym = _ctx->decorateSymbol(name); + if (_ctx->definedSymbols().count(sym)) + return true; + std::string ignore; + return findDecoratedSymbol(_ctx, sym, ignore); + }; + + switch (_ctx->getSubsystem()) { + case WindowsSubsystem::IMAGE_SUBSYSTEM_UNKNOWN: { + if (defined("wWinMain")) + return wWinMainCRTStartup; + if (defined("WinMain")) + return WinMainCRTStartup; + if (defined("wmain")) + return wmainCRTStartup; + if (!defined("main")) + llvm::errs() << "Cannot infer subsystem; assuming /subsystem:console\n"; + return mainCRTStartup; + } + case WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI: + if (defined("WinMain")) + return WinMainCRTStartup; + return wWinMainCRTStartup; + case WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_CUI: + if (defined("wmain")) + return wmainCRTStartup; + return mainCRTStartup; + default: + return mainCRTStartup; + } + } + + PECOFFLinkingContext *_ctx; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + std::mutex _mutex; + llvm::BumpPtrAllocator _alloc; + bool _firstTime; +}; + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp b/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp new file mode 100644 index 000000000000..be2f5627f4ea --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp @@ -0,0 +1,75 @@ +//===- lib/ReaderWriter/PECOFF/LoadConfigPass.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A Load Configuration is a data structure for x86 containing an address of the +// SEH handler table. The Data Directory in the file header points to a load +// configuration. Technically that indirection is not needed but exists for +// historical reasons. +// +// If the file being handled has .sxdata section containing SEH handler table, +// this pass will create a Load Configuration atom. +// +//===----------------------------------------------------------------------===// + +#include "Pass.h" +#include "LoadConfigPass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include <climits> +#include <ctime> +#include <utility> + +using llvm::object::coff_load_configuration32; + +namespace lld { +namespace pecoff { +namespace loadcfg { + +LoadConfigAtom::LoadConfigAtom(VirtualFile &file, const DefinedAtom *sxdata, + int count) + : COFFLinkerInternalAtom( + file, file.getNextOrdinal(), + std::vector<uint8_t>(sizeof(coff_load_configuration32))) { + addDir32Reloc( + this, sxdata, llvm::COFF::IMAGE_FILE_MACHINE_I386, + offsetof(llvm::object::coff_load_configuration32, SEHandlerTable)); + auto *data = getContents<llvm::object::coff_load_configuration32>(); + data->SEHandlerCount = count; +} + +} // namespace loadcfg + +void LoadConfigPass::perform(std::unique_ptr<MutableFile> &file) { + if (_ctx.noSEH()) + return; + + // Find the first atom in .sxdata section. + const DefinedAtom *sxdata = nullptr; + int sectionSize = 0; + for (const DefinedAtom *atom : file->defined()) { + if (atom->customSectionName() == ".sxdata") { + if (!sxdata) + sxdata = atom; + sectionSize += sxdata->size(); + } + } + if (!sxdata) + return; + + auto *loadcfg = new (_alloc) + loadcfg::LoadConfigAtom(_file, sxdata, sectionSize / sizeof(uint32_t)); + file->addAtom(*loadcfg); +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/LoadConfigPass.h b/lib/ReaderWriter/PECOFF/LoadConfigPass.h new file mode 100644 index 000000000000..9f4a25f2b10e --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LoadConfigPass.h @@ -0,0 +1,63 @@ +//===- lib/ReaderWriter/PECOFF/LoadConfigPass.h ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates an atom for Load Configuration +/// structure. +/// +/// For the details of the Load Configuration structure, see Microsoft PE/COFF +/// Specification section 5.8. The Load Configuration Structure (Image Only). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_LOAD_CONFIG_PASS_H +#define LLD_READER_WRITER_PE_COFF_LOAD_CONFIG_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include <map> + +namespace lld { +namespace pecoff { +namespace loadcfg { + +class LoadConfigAtom : public COFFLinkerInternalAtom { +public: + LoadConfigAtom(VirtualFile &file, const DefinedAtom *sxdata, int count); + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".loadcfg"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + + template <typename T> T *getContents() const { + return (T *)const_cast<uint8_t *>(rawContent().data()); + } +}; + +} // namespace loadcfg + +class LoadConfigPass : public lld::Pass { +public: + LoadConfigPass(PECOFFLinkingContext &ctx) : _ctx(ctx), _file(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + PECOFFLinkingContext &_ctx; + VirtualFile _file; + mutable llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/Makefile b/lib/ReaderWriter/PECOFF/Makefile new file mode 100644 index 000000000000..3ad16969bba7 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Makefile @@ -0,0 +1,14 @@ +##===- lld/lib/ReaderWriter/PECOFF/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldPECOFF +USEDLIBS = lldCore.a + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/PECOFF/OrderPass.h b/lib/ReaderWriter/PECOFF/OrderPass.h new file mode 100644 index 000000000000..73133ff73638 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/OrderPass.h @@ -0,0 +1,67 @@ +//===- lib/ReaderWriter/PECOFF/OrderPass.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This pass sorts atoms by section name, so that they will appear +/// in the correct order in the output. +/// +/// In COFF, sections will be merged into one section by the linker if their +/// names are the same after discarding the "$" character and all characters +/// follow it from their names. The characters following the "$" character +/// determines the merge order. Assume there's an object file containing four +/// data sections in the following order. +/// +/// - .data$2 +/// - .data$3 +/// - .data$1 +/// - .data +/// +/// In this case, the resulting binary should have ".data" section with the +/// contents of ".data", ".data$1", ".data$2" and ".data$3" in that order. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_ORDER_PASS_H +#define LLD_READER_WRITER_PE_COFF_ORDER_PASS_H + +#include "Atoms.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/Pass.h" +#include <algorithm> + +namespace lld { +namespace pecoff { + +static bool compare(const DefinedAtom *lhs, const DefinedAtom *rhs) { + bool lhsCustom = (lhs->sectionChoice() == DefinedAtom::sectionCustomRequired); + bool rhsCustom = (rhs->sectionChoice() == DefinedAtom::sectionCustomRequired); + if (lhsCustom && rhsCustom) { + int cmp = lhs->customSectionName().compare(rhs->customSectionName()); + if (cmp != 0) + return cmp < 0; + return DefinedAtom::compareByPosition(lhs, rhs); + } + if (lhsCustom && !rhsCustom) + return true; + if (!lhsCustom && rhsCustom) + return false; + return DefinedAtom::compareByPosition(lhs, rhs); +} + +class OrderPass : public lld::Pass { +public: + void perform(std::unique_ptr<MutableFile> &file) override { + MutableFile::DefinedAtomRange defined = file->definedAtoms(); + parallel_sort(defined.begin(), defined.end(), compare); + } +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/PDBPass.h b/lib/ReaderWriter/PECOFF/PDBPass.h new file mode 100644 index 000000000000..0efa054db823 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/PDBPass.h @@ -0,0 +1,43 @@ +//===- lib/ReaderWriter/PECOFF/PDBPass.h ----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_PDB_PASS_H +#define LLD_READER_WRITER_PE_COFF_PDB_PASS_H + +#include "lld/Core/Pass.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Process.h" + +namespace lld { +namespace pecoff { + +class PDBPass : public lld::Pass { +public: + PDBPass(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override { + if (_ctx.getDebug()) + touch(_ctx.getPDBFilePath()); + } + +private: + void touch(StringRef path) { + int fd; + if (llvm::sys::fs::openFileForWrite(path, fd, llvm::sys::fs::F_Append)) + llvm::report_fatal_error("failed to create a PDB file"); + llvm::sys::Process::SafelyCloseFileDescriptor(fd); + } + + PECOFFLinkingContext &_ctx; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp b/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp new file mode 100644 index 000000000000..6a657e33541d --- /dev/null +++ b/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp @@ -0,0 +1,352 @@ +//===- lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "EdataPass.h" +#include "IdataPass.h" +#include "InferSubsystemPass.h" +#include "LinkerGeneratedSymbolFile.h" +#include "LoadConfigPass.h" +#include "OrderPass.h" +#include "PDBPass.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Path.h" +#include <bitset> +#include <climits> +#include <set> + +namespace lld { + +bool PECOFFLinkingContext::validateImpl(raw_ostream &diagnostics) { + if (_stackReserve < _stackCommit) { + diagnostics << "Invalid stack size: reserve size must be equal to or " + << "greater than commit size, but got " << _stackCommit + << " and " << _stackReserve << ".\n"; + return false; + } + + if (_heapReserve < _heapCommit) { + diagnostics << "Invalid heap size: reserve size must be equal to or " + << "greater than commit size, but got " << _heapCommit + << " and " << _heapReserve << ".\n"; + return false; + } + + // It's an error if the base address is not multiple of 64K. + if (getBaseAddress() & 0xffff) { + diagnostics << "Base address have to be multiple of 64K, but got " + << getBaseAddress() << "\n"; + return false; + } + + // Specifing /noentry without /dll is an error. + if (!hasEntry() && !isDll()) { + diagnostics << "/noentry must be specified with /dll\n"; + return false; + } + + // Check for duplicate export ordinals. + std::set<int> exports; + for (const PECOFFLinkingContext::ExportDesc &desc : getDllExports()) { + if (desc.ordinal == -1) + continue; + if (exports.count(desc.ordinal) == 1) { + diagnostics << "Duplicate export ordinals: " << desc.ordinal << "\n"; + return false; + } + exports.insert(desc.ordinal); + } + + // Check for /align. + std::bitset<64> alignment(_sectionDefaultAlignment); + if (alignment.count() != 1) { + diagnostics << "Section alignment must be a power of 2, but got " + << _sectionDefaultAlignment << "\n"; + return false; + } + + _writer = createWriterPECOFF(*this); + return true; +} + +const std::set<std::string> &PECOFFLinkingContext::definedSymbols() { + std::lock_guard<std::recursive_mutex> lock(_mutex); + for (std::unique_ptr<Node> &node : getNodes()) { + if (_seen.count(node.get()) > 0) + continue; + FileNode *fnode = dyn_cast<FileNode>(node.get()); + if (!fnode) + continue; + File *file = fnode->getFile(); + if (file->parse()) + continue; + if (auto *archive = dyn_cast<ArchiveLibraryFile>(file)) { + for (const std::string &sym : archive->getDefinedSymbols()) + _definedSyms.insert(sym); + continue; + } + for (const DefinedAtom *atom : file->defined()) + if (!atom->name().empty()) + _definedSyms.insert(atom->name()); + } + return _definedSyms; +} + +std::unique_ptr<File> PECOFFLinkingContext::createEntrySymbolFile() const { + return LinkingContext::createEntrySymbolFile("<command line option /entry>"); +} + +std::unique_ptr<File> PECOFFLinkingContext::createUndefinedSymbolFile() const { + return LinkingContext::createUndefinedSymbolFile( + "<command line option /include>"); +} + +static int getGroupStartPos(std::vector<std::unique_ptr<Node>> &nodes) { + for (int i = 0, e = nodes.size(); i < e; ++i) + if (GroupEnd *group = dyn_cast<GroupEnd>(nodes[i].get())) + return i - group->getSize(); + llvm::report_fatal_error("internal error"); +} + +void PECOFFLinkingContext::addLibraryFile(std::unique_ptr<FileNode> file) { + GroupEnd *currentGroupEnd; + int pos = -1; + std::vector<std::unique_ptr<Node>> &elements = getNodes(); + for (int i = 0, e = elements.size(); i < e; ++i) { + if ((currentGroupEnd = dyn_cast<GroupEnd>(elements[i].get()))) { + pos = i; + break; + } + } + assert(pos >= 0); + elements.insert(elements.begin() + pos, std::move(file)); + elements[pos + 1] = llvm::make_unique<GroupEnd>( + currentGroupEnd->getSize() + 1); +} + +bool PECOFFLinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File>> &) { + std::vector<std::unique_ptr<Node>> &members = getNodes(); + + // Create a file for the entry point function. + std::unique_ptr<FileNode> entry(new FileNode( + llvm::make_unique<pecoff::EntryPointFile>(*this))); + members.insert(members.begin() + getGroupStartPos(members), std::move(entry)); + + // Create a file for __ImageBase. + std::unique_ptr<FileNode> fileNode(new FileNode( + llvm::make_unique<pecoff::LinkerGeneratedSymbolFile>(*this))); + members.push_back(std::move(fileNode)); + + // Create a file for _imp_ symbols. + std::unique_ptr<FileNode> impFileNode(new FileNode( + llvm::make_unique<pecoff::LocallyImportedSymbolFile>(*this))); + members.push_back(std::move(impFileNode)); + + // Create a file for dllexported symbols. + std::unique_ptr<FileNode> exportNode(new FileNode( + llvm::make_unique<pecoff::ExportedSymbolRenameFile>(*this))); + addLibraryFile(std::move(exportNode)); + + return true; +} + +/// Returns the section name in the resulting executable. +/// +/// Sections in object files are usually output to the executable with the same +/// name, but you can rename by command line option. /merge:from=to makes the +/// linker to combine "from" section contents to "to" section in the +/// executable. We have a mapping for the renaming. This method looks up the +/// table and returns a new section name if renamed. +StringRef +PECOFFLinkingContext::getOutputSectionName(StringRef sectionName) const { + auto it = _renamedSections.find(sectionName); + if (it == _renamedSections.end()) + return sectionName; + return getOutputSectionName(it->second); +} + +/// Adds a mapping to the section renaming table. This method will be used for +/// /merge command line option. +bool PECOFFLinkingContext::addSectionRenaming(raw_ostream &diagnostics, + StringRef from, StringRef to) { + auto it = _renamedSections.find(from); + if (it != _renamedSections.end()) { + if (it->second == to) + // There's already the same mapping. + return true; + diagnostics << "Section \"" << from << "\" is already mapped to \"" + << it->second << ", so it cannot be mapped to \"" << to << "\"."; + return true; + } + + // Add a mapping, and check if there's no cycle in the renaming mapping. The + // cycle detection algorithm we use here is naive, but that's OK because the + // number of mapping is usually less than 10. + _renamedSections[from] = to; + for (auto elem : _renamedSections) { + StringRef sectionName = elem.first; + std::set<StringRef> visited; + visited.insert(sectionName); + for (;;) { + auto pos = _renamedSections.find(sectionName); + if (pos == _renamedSections.end()) + break; + if (visited.count(pos->second)) { + diagnostics << "/merge:" << from << "=" << to << " makes a cycle"; + return false; + } + sectionName = pos->second; + visited.insert(sectionName); + } + } + return true; +} + +/// Try to find the input library file from the search paths and append it to +/// the input file list. Returns true if the library file is found. +StringRef PECOFFLinkingContext::searchLibraryFile(StringRef filename) const { + // Current directory always takes precedence over the search paths. + if (llvm::sys::path::is_absolute(filename) || llvm::sys::fs::exists(filename)) + return filename; + // Iterate over the search paths. + for (StringRef dir : _inputSearchPaths) { + SmallString<128> path = dir; + llvm::sys::path::append(path, filename); + if (llvm::sys::fs::exists(path.str())) + return allocate(path.str()); + } + return filename; +} + +/// Returns the decorated name of the given symbol name. On 32-bit x86, it +/// adds "_" at the beginning of the string. On other architectures, the +/// return value is the same as the argument. +StringRef PECOFFLinkingContext::decorateSymbol(StringRef name) const { + if (_machineType != llvm::COFF::IMAGE_FILE_MACHINE_I386) + return name; + std::string str = "_"; + str.append(name); + return allocate(str); +} + +StringRef PECOFFLinkingContext::undecorateSymbol(StringRef name) const { + if (_machineType != llvm::COFF::IMAGE_FILE_MACHINE_I386) + return name; + if (!name.startswith("_")) + return name; + return name.substr(1); +} + +uint64_t PECOFFLinkingContext::getBaseAddress() const { + if (_baseAddress == invalidBaseAddress) + return is64Bit() ? pe32PlusDefaultBaseAddress : pe32DefaultBaseAddress; + return _baseAddress; +} + +Writer &PECOFFLinkingContext::writer() const { return *_writer; } + +void PECOFFLinkingContext::setSectionSetMask(StringRef sectionName, + uint32_t newFlags) { + _sectionSetMask[sectionName] |= newFlags; + _sectionClearMask[sectionName] &= ~newFlags; + const uint32_t rwx = (llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_MEM_WRITE | + llvm::COFF::IMAGE_SCN_MEM_EXECUTE); + if (newFlags & rwx) + _sectionClearMask[sectionName] |= ~_sectionSetMask[sectionName] & rwx; + assert((_sectionSetMask[sectionName] & _sectionClearMask[sectionName]) == 0); +} + +void PECOFFLinkingContext::setSectionClearMask(StringRef sectionName, + uint32_t newFlags) { + _sectionClearMask[sectionName] |= newFlags; + _sectionSetMask[sectionName] &= ~newFlags; + assert((_sectionSetMask[sectionName] & _sectionClearMask[sectionName]) == 0); +} + +uint32_t PECOFFLinkingContext::getSectionAttributes(StringRef sectionName, + uint32_t flags) const { + auto si = _sectionSetMask.find(sectionName); + uint32_t setMask = (si == _sectionSetMask.end()) ? 0 : si->second; + auto ci = _sectionClearMask.find(sectionName); + uint32_t clearMask = (ci == _sectionClearMask.end()) ? 0 : ci->second; + return (flags | setMask) & ~clearMask; +} + +// Returns true if two export descriptors are the same. +static bool sameExportDesc(const PECOFFLinkingContext::ExportDesc &a, + const PECOFFLinkingContext::ExportDesc &b) { + return a.ordinal == b.ordinal && a.ordinal == b.ordinal && + a.noname == b.noname && a.isData == b.isData; +} + +void PECOFFLinkingContext::addDllExport(ExportDesc &desc) { + addInitialUndefinedSymbol(allocate(desc.name)); + + // MSVC link.exe silently drops characters after the first atsign. + // For example, /export:foo@4=bar is equivalent to /export:foo=bar. + // We do the same thing for compatibility. + if (!desc.externalName.empty()) { + StringRef s(desc.externalName); + size_t pos = s.find('@'); + if (pos != s.npos) + desc.externalName = s.substr(0, pos); + } + + // Scan the vector to look for existing entry. It's not very fast, + // but because the number of exported symbol is usually not that + // much, it should be okay. + for (ExportDesc &e : _dllExports) { + if (e.name != desc.name) + continue; + if (!sameExportDesc(e, desc)) + llvm::errs() << "Export symbol '" << desc.name + << "' specified more than once.\n"; + return; + } + _dllExports.push_back(desc); +} + +static std::string replaceExtension(StringRef path, StringRef ext) { + SmallString<128> ss = path; + llvm::sys::path::replace_extension(ss, ext); + return ss.str(); +} + +std::string PECOFFLinkingContext::getOutputImportLibraryPath() const { + if (!_implib.empty()) + return _implib; + return replaceExtension(outputPath(), ".lib"); +} + +std::string PECOFFLinkingContext::getPDBFilePath() const { + assert(_debug); + if (!_pdbFilePath.empty()) + return _pdbFilePath; + return replaceExtension(outputPath(), ".pdb"); +} + +void PECOFFLinkingContext::addPasses(PassManager &pm) { + pm.add(llvm::make_unique<pecoff::PDBPass>(*this)); + pm.add(llvm::make_unique<pecoff::EdataPass>(*this)); + pm.add(llvm::make_unique<pecoff::IdataPass>(*this)); + pm.add(llvm::make_unique<pecoff::OrderPass>()); + pm.add(llvm::make_unique<pecoff::LoadConfigPass>(*this)); + pm.add(llvm::make_unique<pecoff::InferSubsystemPass>(*this)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/Pass.cpp b/lib/ReaderWriter/PECOFF/Pass.cpp new file mode 100644 index 000000000000..ed731984e378 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Pass.cpp @@ -0,0 +1,95 @@ +//===- lib/ReaderWriter/PECOFF/Pass.cpp -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "Pass.h" +#include "lld/Core/File.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/COFF.h" + +namespace lld { +namespace pecoff { + +static void addReloc(COFFBaseDefinedAtom *atom, const Atom *target, + size_t offsetInAtom, Reference::KindArch arch, + Reference::KindValue relType) { + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, arch, relType, offsetInAtom, target, 0)); +} + +void addDir64Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR64); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addDir32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addDir32NBReloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32NB); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32NB); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + addReloc(atom, target, offsetInAtom, Reference::KindArch::ARM, + llvm::COFF::IMAGE_REL_ARM_ADDR32NB); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addRel32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_REL32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_REL32); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/Pass.h b/lib/ReaderWriter/PECOFF/Pass.h new file mode 100644 index 000000000000..22466f77859e --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Pass.h @@ -0,0 +1,34 @@ +//===- lib/ReaderWriter/PECOFF/Pass.h -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_PASS_H +#define LLD_READER_WRITER_PE_COFF_PASS_H + +#include "Atoms.h" +#include "llvm/Support/COFF.h" + +namespace lld { +namespace pecoff { + +void addDir64Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addDir32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addDir32NBReloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addRel32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp b/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp new file mode 100644 index 000000000000..f060bd8dc0bc --- /dev/null +++ b/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp @@ -0,0 +1,1140 @@ +//===- lib/ReaderWriter/PECOFF/ReaderCOFF.cpp -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Alias.h" +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <map> +#include <mutex> +#include <set> +#include <system_error> +#include <vector> + +#define DEBUG_TYPE "ReaderCOFF" + +using lld::pecoff::COFFBSSAtom; +using lld::pecoff::COFFDefinedAtom; +using lld::pecoff::COFFDefinedFileAtom; +using lld::pecoff::COFFUndefinedAtom; +using llvm::object::coff_aux_section_definition; +using llvm::object::coff_aux_weak_external; +using llvm::object::coff_relocation; +using llvm::object::coff_section; +using llvm::object::coff_symbol; +using llvm::support::ulittle32_t; + +using namespace lld; + +namespace { + +class BumpPtrStringSaver : public llvm::cl::StringSaver { +public: + const char *SaveString(const char *str) override { + size_t len = strlen(str); + std::lock_guard<std::mutex> lock(_allocMutex); + char *copy = _alloc.Allocate<char>(len + 1); + memcpy(copy, str, len + 1); + return copy; + } + +private: + llvm::BumpPtrAllocator _alloc; + std::mutex _allocMutex; +}; + +class FileCOFF : public File { +private: + typedef std::vector<llvm::object::COFFSymbolRef> SymbolVectorT; + typedef std::map<const coff_section *, SymbolVectorT> SectionToSymbolsT; + +public: + FileCOFF(std::unique_ptr<MemoryBuffer> mb, PECOFFLinkingContext &ctx) + : File(mb->getBufferIdentifier(), kindObject), _mb(std::move(mb)), + _compatibleWithSEH(false), _ordinal(1), + _machineType(llvm::COFF::MT_Invalid), _ctx(ctx) {} + + std::error_code doParse() override; + bool isCompatibleWithSEH() const { return _compatibleWithSEH; } + llvm::COFF::MachineTypes getMachineType() { return _machineType; } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + void beforeLink() override; + + void addUndefinedSymbol(StringRef sym) { + _undefinedAtoms._atoms.push_back(new (_alloc) COFFUndefinedAtom(*this, sym)); + } + + AliasAtom *createAlias(StringRef name, const DefinedAtom *target, int cnt); + void createAlternateNameAtoms(); + std::error_code parseDirectiveSection(StringRef directives); + + mutable llvm::BumpPtrAllocator _alloc; + +private: + std::error_code readSymbolTable(SymbolVectorT &result); + + void createAbsoluteAtoms(const SymbolVectorT &symbols, + std::vector<const AbsoluteAtom *> &result); + + std::error_code + createUndefinedAtoms(const SymbolVectorT &symbols, + std::vector<const UndefinedAtom *> &result); + + std::error_code + createDefinedSymbols(const SymbolVectorT &symbols, + std::vector<const DefinedAtom *> &result); + + std::error_code cacheSectionAttributes(); + std::error_code maybeCreateSXDataAtoms(); + + std::error_code + AtomizeDefinedSymbolsInSection(const coff_section *section, + SymbolVectorT &symbols, + std::vector<COFFDefinedFileAtom *> &atoms); + + std::error_code + AtomizeDefinedSymbols(SectionToSymbolsT &definedSymbols, + std::vector<const DefinedAtom *> &definedAtoms); + + std::error_code findAtomAt(const coff_section *section, + uint32_t targetAddress, + COFFDefinedFileAtom *&result, + uint32_t &offsetInAtom); + + std::error_code getAtomBySymbolIndex(uint32_t index, Atom *&ret); + + std::error_code + addRelocationReference(const coff_relocation *rel, + const coff_section *section); + + std::error_code getSectionContents(StringRef sectionName, + ArrayRef<uint8_t> &result); + std::error_code getReferenceArch(Reference::KindArch &result); + std::error_code addRelocationReferenceToAtoms(); + std::error_code findSection(StringRef name, const coff_section *&result); + StringRef ArrayRefToString(ArrayRef<uint8_t> array); + uint64_t getNextOrdinal(); + + std::unique_ptr<const llvm::object::COFFObjectFile> _obj; + std::unique_ptr<MemoryBuffer> _mb; + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; + + // The target type of the object. + Reference::KindArch _referenceArch; + + // True if the object has "@feat.00" symbol. + bool _compatibleWithSEH; + + // A map from symbol to its name. All symbols should be in this map except + // unnamed ones. + std::map<llvm::object::COFFSymbolRef, StringRef> _symbolName; + + // A map from symbol to its resultant atom. + std::map<llvm::object::COFFSymbolRef, Atom *> _symbolAtom; + + // A map from symbol to its aux symbol. + std::map<llvm::object::COFFSymbolRef, llvm::object::COFFSymbolRef> _auxSymbol; + + // A map from section to its atoms. + std::map<const coff_section *, std::vector<COFFDefinedFileAtom *>> + _sectionAtoms; + + // A set of COMDAT sections. + std::set<const coff_section *> _comdatSections; + + // A map to get whether the section allows its contents to be merged or not. + std::map<const coff_section *, DefinedAtom::Merge> _merge; + + // COMDAT associative sections + std::multimap<const coff_section *, const coff_section *> _association; + + // A sorted map to find an atom from a section and an offset within + // the section. + std::map<const coff_section *, std::multimap<uint32_t, COFFDefinedAtom *>> + _definedAtomLocations; + + uint64_t _ordinal; + llvm::COFF::MachineTypes _machineType; + PECOFFLinkingContext &_ctx; + mutable BumpPtrStringSaver _stringSaver; +}; + +// Converts the COFF symbol attribute to the LLD's atom attribute. +Atom::Scope getScope(llvm::object::COFFSymbolRef symbol) { + switch (symbol.getStorageClass()) { + case llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL: + return Atom::scopeGlobal; + case llvm::COFF::IMAGE_SYM_CLASS_STATIC: + case llvm::COFF::IMAGE_SYM_CLASS_LABEL: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("Unknown scope"); +} + +DefinedAtom::ContentType getContentType(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) + return DefinedAtom::typeCode; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + return DefinedAtom::typeData; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return DefinedAtom::typeZeroFill; + return DefinedAtom::typeUnknown; +} + +DefinedAtom::ContentPermissions getPermissions(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ && + section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_WRITE) + return DefinedAtom::permRW_; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ && + section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) + return DefinedAtom::permR_X; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) + return DefinedAtom::permR__; + return DefinedAtom::perm___; +} + +/// Returns the alignment of the section. The contents of the section must be +/// aligned by this value in the resulting executable/DLL. +DefinedAtom::Alignment getAlignment(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_TYPE_NO_PAD) + return DefinedAtom::Alignment(0); + + // Bit [20:24] contains section alignment information. We need to decrease + // the value stored by 1 in order to get the real exponent (e.g, ALIGN_1BYTE + // is 0x00100000, but the exponent should be 0) + uint32_t characteristics = (section->Characteristics >> 20) & 0xf; + + // If all bits are off, we treat it as if ALIGN_1BYTE was on. The PE/COFF spec + // does not say anything about this case, but CVTRES.EXE does not set any bit + // in characteristics[20:24], and its output is intended to be copied to .rsrc + // section with no padding, so I think doing this is the right thing. + if (characteristics == 0) + return DefinedAtom::Alignment(0); + + uint32_t powerOf2 = characteristics - 1; + return DefinedAtom::Alignment(powerOf2); +} + +DefinedAtom::Merge getMerge(const coff_aux_section_definition *auxsym) { + switch (auxsym->Selection) { + case llvm::COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: + return DefinedAtom::mergeNo; + case llvm::COFF::IMAGE_COMDAT_SELECT_ANY: + return DefinedAtom::mergeAsWeakAndAddressUsed; + case llvm::COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + // TODO: This mapping is wrong. Fix it. + return DefinedAtom::mergeByContent; + case llvm::COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: + return DefinedAtom::mergeSameNameAndSize; + case llvm::COFF::IMAGE_COMDAT_SELECT_LARGEST: + return DefinedAtom::mergeByLargestSection; + case llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: + case llvm::COFF::IMAGE_COMDAT_SELECT_NEWEST: + // FIXME: These attributes has more complicated semantics than the regular + // weak symbol. These are mapped to mergeAsWeakAndAddressUsed for now + // because the core linker does not support them yet. We eventually have + // to implement them for full COFF support. + return DefinedAtom::mergeAsWeakAndAddressUsed; + default: + llvm_unreachable("Unknown merge type"); + } +} + +StringRef getMachineName(llvm::COFF::MachineTypes Type) { + switch (Type) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + return "ARM"; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + return "X86"; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + return "X64"; + } +} + +std::error_code FileCOFF::doParse() { + auto binaryOrErr = llvm::object::createBinary(_mb->getMemBufferRef()); + if (std::error_code ec = binaryOrErr.getError()) + return ec; + std::unique_ptr<llvm::object::Binary> bin = std::move(binaryOrErr.get()); + + _obj.reset(dyn_cast<const llvm::object::COFFObjectFile>(bin.get())); + if (!_obj) + return make_error_code(llvm::object::object_error::invalid_file_type); + bin.release(); + + _machineType = static_cast<llvm::COFF::MachineTypes>(_obj->getMachine()); + + if (getMachineType() != llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN && + getMachineType() != _ctx.getMachineType()) { + llvm::errs() << "module machine type '" + << getMachineName(getMachineType()) + << "' conflicts with target machine type '" + << getMachineName(_ctx.getMachineType()) << "'\n"; + return NativeReaderError::conflicting_target_machine; + } + + if (std::error_code ec = getReferenceArch(_referenceArch)) + return ec; + + // Read the symbol table and atomize them if possible. Defined atoms + // cannot be atomized in one pass, so they will be not be atomized but + // added to symbolToAtom. + SymbolVectorT symbols; + if (std::error_code ec = readSymbolTable(symbols)) + return ec; + + createAbsoluteAtoms(symbols, _absoluteAtoms._atoms); + if (std::error_code ec = + createUndefinedAtoms(symbols, _undefinedAtoms._atoms)) + return ec; + if (std::error_code ec = createDefinedSymbols(symbols, _definedAtoms._atoms)) + return ec; + if (std::error_code ec = addRelocationReferenceToAtoms()) + return ec; + if (std::error_code ec = maybeCreateSXDataAtoms()) + return ec; + + // Check for /SAFESEH. + if (_ctx.requireSEH() && !isCompatibleWithSEH()) { + llvm::errs() << "/SAFESEH is specified, but " + << _mb->getBufferIdentifier() + << " is not compatible with SEH.\n"; + return llvm::object::object_error::parse_failed; + } + return std::error_code(); +} + +void FileCOFF::beforeLink() { + // Acquire the mutex to mutate _ctx. + std::lock_guard<std::recursive_mutex> lock(_ctx.getMutex()); + std::set<StringRef> undefSyms; + + // Interpret .drectve section if the section has contents. + ArrayRef<uint8_t> directives; + if (getSectionContents(".drectve", directives)) + return; + if (!directives.empty()) { + std::set<StringRef> orig; + for (StringRef sym : _ctx.initialUndefinedSymbols()) + orig.insert(sym); + if (parseDirectiveSection(ArrayRefToString(directives))) + return; + for (StringRef sym : _ctx.initialUndefinedSymbols()) + if (orig.count(sym) == 0) + undefSyms.insert(sym); + } + + // Add /INCLUDE'ed symbols to the file as if they existed in the + // file as undefined symbols. + for (StringRef sym : undefSyms) { + addUndefinedSymbol(sym); + _ctx.addDeadStripRoot(sym); + } + + // One can define alias symbols using /alternatename:<sym>=<sym> option. + // The mapping for /alternatename is in the context object. This helper + // function iterate over defined atoms and create alias atoms if needed. + createAlternateNameAtoms(); + + // In order to emit SEH table, all input files need to be compatible with + // SEH. Disable SEH if the file being read is not compatible. + if (!isCompatibleWithSEH()) + _ctx.setSafeSEH(false); +} + +/// Iterate over the symbol table to retrieve all symbols. +std::error_code +FileCOFF::readSymbolTable(SymbolVectorT &result) { + for (uint32_t i = 0, e = _obj->getNumberOfSymbols(); i != e; ++i) { + // Retrieve the symbol. + ErrorOr<llvm::object::COFFSymbolRef> sym = _obj->getSymbol(i); + StringRef name; + if (std::error_code ec = sym.getError()) + return ec; + if (sym->getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG) + goto next; + result.push_back(*sym); + + if (std::error_code ec = _obj->getSymbolName(*sym, name)) + return ec; + + // Existence of the symbol @feat.00 indicates that object file is compatible + // with Safe Exception Handling. + if (name == "@feat.00") { + _compatibleWithSEH = true; + goto next; + } + + // Cache the name. + _symbolName[*sym] = name; + + // Symbol may be followed by auxiliary symbol table records. The aux + // record can be in any format, but the size is always the same as the + // regular symbol. The aux record supplies additional information for the + // standard symbol. We do not interpret the aux record here, but just + // store it to _auxSymbol. + if (sym->getNumberOfAuxSymbols() > 0) { + ErrorOr<llvm::object::COFFSymbolRef> aux = _obj->getSymbol(i + 1); + if (std::error_code ec = aux.getError()) + return ec; + _auxSymbol[*sym] = *aux; + } + next: + i += sym->getNumberOfAuxSymbols(); + } + return std::error_code(); +} + +/// Create atoms for the absolute symbols. +void FileCOFF::createAbsoluteAtoms(const SymbolVectorT &symbols, + std::vector<const AbsoluteAtom *> &result) { + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_ABSOLUTE) + continue; + auto *atom = new (_alloc) SimpleAbsoluteAtom(*this, _symbolName[sym], + getScope(sym), sym.getValue()); + result.push_back(atom); + _symbolAtom[sym] = atom; + } +} + +/// Create atoms for the undefined symbols. This code is bit complicated +/// because it supports "weak externals" mechanism of COFF. If an undefined +/// symbol (sym1) has auxiliary data, the data contains a symbol table index +/// at which the "second symbol" (sym2) for sym1 exists. If sym1 is resolved, +/// it's linked normally. If not, sym1 is resolved as if it has sym2's +/// name. This relationship between sym1 and sym2 is represented using +/// fallback mechanism of undefined symbol. +std::error_code +FileCOFF::createUndefinedAtoms(const SymbolVectorT &symbols, + std::vector<const UndefinedAtom *> &result) { + std::map<llvm::object::COFFSymbolRef, llvm::object::COFFSymbolRef> + weakExternal; + std::set<llvm::object::COFFSymbolRef> fallback; + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + // Create a mapping from sym1 to sym2, if the undefined symbol has + // auxiliary data. + auto iter = _auxSymbol.find(sym); + if (iter == _auxSymbol.end()) + continue; + const coff_aux_weak_external *aux = + reinterpret_cast<const coff_aux_weak_external *>( + iter->second.getRawPtr()); + ErrorOr<llvm::object::COFFSymbolRef> sym2 = _obj->getSymbol(aux->TagIndex); + if (std::error_code ec = sym2.getError()) + return ec; + weakExternal[sym] = *sym2; + fallback.insert(*sym2); + } + + // Create atoms for the undefined symbols. + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + if (fallback.count(sym) > 0) + continue; + + // If the symbol has sym2, create an undefiend atom for sym2, so that we + // can pass it as a fallback atom. + UndefinedAtom *fallback = nullptr; + auto iter = weakExternal.find(sym); + if (iter != weakExternal.end()) { + llvm::object::COFFSymbolRef sym2 = iter->second; + fallback = new (_alloc) COFFUndefinedAtom(*this, _symbolName[sym2]); + _symbolAtom[sym2] = fallback; + } + + // Create an atom for the symbol. + auto *atom = + new (_alloc) COFFUndefinedAtom(*this, _symbolName[sym], fallback); + result.push_back(atom); + _symbolAtom[sym] = atom; + } + return std::error_code(); +} + +/// Create atoms for the defined symbols. This pass is a bit complicated than +/// the other two, because in order to create the atom for the defined symbol +/// we need to know the adjacent symbols. +std::error_code +FileCOFF::createDefinedSymbols(const SymbolVectorT &symbols, + std::vector<const DefinedAtom *> &result) { + // A defined atom can be merged if its section attribute allows its contents + // to be merged. In COFF, it's not very easy to get the section attribute + // for the symbol, so scan all sections in advance and cache the attributes + // for later use. + if (std::error_code ec = cacheSectionAttributes()) + return ec; + + // Filter non-defined atoms, and group defined atoms by its section. + SectionToSymbolsT definedSymbols; + for (llvm::object::COFFSymbolRef sym : symbols) { + // A symbol with section number 0 and non-zero value represents a common + // symbol. The MS COFF spec did not give a definition of what the common + // symbol is. We should probably follow ELF's definition shown below. + // + // - If one object file has a common symbol and another has a definition, + // the common symbol is treated as an undefined reference. + // - If there is no definition for a common symbol, the program linker + // acts as though it saw a definition initialized to zero of the + // appropriate size. + // - Two object files may have common symbols of + // different sizes, in which case the program linker will use the + // largest size. + // + // FIXME: We are currently treating the common symbol as a normal + // mergeable atom. Implement the above semantcis. + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED && + sym.getValue() > 0) { + StringRef name = _symbolName[sym]; + uint32_t size = sym.getValue(); + auto *atom = new (_alloc) + COFFBSSAtom(*this, name, getScope(sym), DefinedAtom::permRW_, + DefinedAtom::mergeAsWeakAndAddressUsed, size, getNextOrdinal()); + + // Common symbols should be aligned on natural boundaries with the maximum + // of 32 byte. It's not documented anywhere, but it's what MSVC link.exe + // seems to be doing. + uint64_t alignment = std::min((uint64_t)32, llvm::NextPowerOf2(size)); + atom->setAlignment( + DefinedAtom::Alignment(llvm::countTrailingZeros(alignment))); + result.push_back(atom); + continue; + } + + // Skip if it's not for defined atom. + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_ABSOLUTE || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + + const coff_section *sec; + if (std::error_code ec = _obj->getSection(sym.getSectionNumber(), sec)) + return ec; + assert(sec && "SectionIndex > 0, Sec must be non-null!"); + + uint8_t sc = sym.getStorageClass(); + if (sc != llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL && + sc != llvm::COFF::IMAGE_SYM_CLASS_STATIC && + sc != llvm::COFF::IMAGE_SYM_CLASS_FUNCTION && + sc != llvm::COFF::IMAGE_SYM_CLASS_LABEL) { + llvm::errs() << "Unable to create atom for: " << _symbolName[sym] << " (" + << static_cast<int>(sc) << ")\n"; + return llvm::object::object_error::parse_failed; + } + + definedSymbols[sec].push_back(sym); + } + + // Atomize the defined symbols. + if (std::error_code ec = AtomizeDefinedSymbols(definedSymbols, result)) + return ec; + + return std::error_code(); +} + +// Cache the COMDAT attributes, which indicate whether the symbols in the +// section can be merged or not. +std::error_code FileCOFF::cacheSectionAttributes() { + // The COMDAT section attribute is not an attribute of coff_section, but is + // stored in the auxiliary symbol for the first symbol referring a COMDAT + // section. It feels to me that it's unnecessarily complicated, but this is + // how COFF works. + for (auto i : _auxSymbol) { + // Read a section from the file + llvm::object::COFFSymbolRef sym = i.first; + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_ABSOLUTE || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + + const coff_section *sec; + if (std::error_code ec = _obj->getSection(sym.getSectionNumber(), sec)) + return ec; + const coff_aux_section_definition *aux = + reinterpret_cast<const coff_aux_section_definition *>( + i.second.getRawPtr()); + + if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_COMDAT) { + // Read aux symbol data. + _comdatSections.insert(sec); + _merge[sec] = getMerge(aux); + } + + // Handle associative sections. + if (aux->Selection == llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + const coff_section *parent; + if (std::error_code ec = + _obj->getSection(aux->getNumber(sym.isBigObj()), parent)) + return ec; + _association.insert(std::make_pair(parent, sec)); + } + } + + // The sections that does not have auxiliary symbol are regular sections, in + // which symbols are not allowed to be merged. + for (const auto §ion : _obj->sections()) { + const coff_section *sec = _obj->getCOFFSection(section); + if (!_merge.count(sec)) + _merge[sec] = DefinedAtom::mergeNo; + } + return std::error_code(); +} + +/// Atomize \p symbols and append the results to \p atoms. The symbols are +/// assumed to have been defined in the \p section. +std::error_code FileCOFF::AtomizeDefinedSymbolsInSection( + const coff_section *section, SymbolVectorT &symbols, + std::vector<COFFDefinedFileAtom *> &atoms) { + // Sort symbols by position. + std::stable_sort( + symbols.begin(), symbols.end(), + [](llvm::object::COFFSymbolRef a, llvm::object::COFFSymbolRef b) + -> bool { return a.getValue() < b.getValue(); }); + + StringRef sectionName; + if (std::error_code ec = _obj->getSectionName(section, sectionName)) + return ec; + + // BSS section does not have contents. If this is the BSS section, create + // COFFBSSAtom instead of COFFDefinedAtom. + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { + llvm::object::COFFSymbolRef sym = *si; + uint32_t size = (si + 1 == se) ? section->SizeOfRawData - sym.getValue() + : si[1].getValue() - sym.getValue(); + auto *atom = new (_alloc) COFFBSSAtom( + *this, _symbolName[sym], getScope(sym), getPermissions(section), + DefinedAtom::mergeAsWeakAndAddressUsed, size, getNextOrdinal()); + atoms.push_back(atom); + _symbolAtom[sym] = atom; + } + return std::error_code(); + } + + ArrayRef<uint8_t> secData; + if (std::error_code ec = _obj->getSectionContents(section, secData)) + return ec; + + // A section with IMAGE_SCN_LNK_{INFO,REMOVE} attribute will never become + // a part of the output image. That's what the COFF spec says. + if (section->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO || + section->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) + return std::error_code(); + + // Supporting debug info needs more work than just linking and combining + // .debug sections. We don't support it yet. Let's discard .debug sections at + // the very beginning of the process so that we don't spend time on linking + // blobs that nobody would understand. + if ((section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE) && + (sectionName == ".debug" || sectionName.startswith(".debug$"))) { + return std::error_code(); + } + + DefinedAtom::ContentType type = getContentType(section); + DefinedAtom::ContentPermissions perms = getPermissions(section); + uint64_t sectionSize = section->SizeOfRawData; + bool isComdat = (_comdatSections.count(section) == 1); + + // Create an atom for the entire section. + if (symbols.empty()) { + ArrayRef<uint8_t> data(secData.data(), secData.size()); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", sectionName, sectionSize, Atom::scopeTranslationUnit, + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _definedAtomLocations[section].insert(std::make_pair(0, atom)); + return std::error_code(); + } + + // Create an unnamed atom if the first atom isn't at the start of the + // section. + if (symbols[0].getValue() != 0) { + uint64_t size = symbols[0].getValue(); + ArrayRef<uint8_t> data(secData.data(), size); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", sectionName, sectionSize, Atom::scopeTranslationUnit, + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _definedAtomLocations[section].insert(std::make_pair(0, atom)); + } + + for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { + const uint8_t *start = secData.data() + si->getValue(); + // if this is the last symbol, take up the remaining data. + const uint8_t *end = (si + 1 == se) ? secData.data() + secData.size() + : secData.data() + (si + 1)->getValue(); + ArrayRef<uint8_t> data(start, end); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, _symbolName[*si], sectionName, sectionSize, getScope(*si), + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _symbolAtom[*si] = atom; + _definedAtomLocations[section].insert(std::make_pair(si->getValue(), atom)); + } + return std::error_code(); +} + +std::error_code FileCOFF::AtomizeDefinedSymbols( + SectionToSymbolsT &definedSymbols, + std::vector<const DefinedAtom *> &definedAtoms) { + // For each section, make atoms for all the symbols defined in the + // section, and append the atoms to the result objects. + for (auto &i : definedSymbols) { + const coff_section *section = i.first; + SymbolVectorT &symbols = i.second; + std::vector<COFFDefinedFileAtom *> atoms; + if (std::error_code ec = + AtomizeDefinedSymbolsInSection(section, symbols, atoms)) + return ec; + + // Set alignment to the first atom so that the section contents + // will be aligned as specified by the object section header. + if (atoms.size() > 0) + atoms[0]->setAlignment(getAlignment(section)); + + // Connect atoms with layout-after edges. It prevents atoms + // from being GC'ed if there is a reference to one of the atoms + // in the same layout-after chain. In such case we want to emit + // all the atoms appeared in the same chain, because the "live" + // atom may reference other atoms in the same chain. + if (atoms.size() >= 2) + for (auto it = atoms.begin(), e = atoms.end(); it + 1 != e; ++it) + addLayoutEdge(*it, *(it + 1), lld::Reference::kindLayoutAfter); + + for (COFFDefinedFileAtom *atom : atoms) { + _sectionAtoms[section].push_back(atom); + definedAtoms.push_back(atom); + } + } + + // A COMDAT section with SELECT_ASSOCIATIVE attribute refer to other + // section. If the referred section is linked to a binary, the + // referring section needs to be linked too. A typical use case of + // this attribute is a static initializer; a parent is a comdat BSS + // section, and a child is a static initializer code for the data. + // + // We add referring section contents to the referred section's + // associate list, so that Resolver takes care of them. + for (auto i : _association) { + const coff_section *parent = i.first; + const coff_section *child = i.second; + if (_sectionAtoms.count(child)) { + COFFDefinedFileAtom *p = _sectionAtoms[parent][0]; + p->addAssociate(_sectionAtoms[child][0]); + } + } + + return std::error_code(); +} + +/// Find the atom that is at \p targetAddress in \p section. +std::error_code FileCOFF::findAtomAt(const coff_section *section, + uint32_t targetAddress, + COFFDefinedFileAtom *&result, + uint32_t &offsetInAtom) { + auto loc = _definedAtomLocations.find(section); + if (loc == _definedAtomLocations.end()) + return llvm::object::object_error::parse_failed; + std::multimap<uint32_t, COFFDefinedAtom *> &map = loc->second; + + auto it = map.upper_bound(targetAddress); + if (it == map.begin()) + return llvm::object::object_error::parse_failed; + --it; + uint32_t atomAddress = it->first; + result = it->second; + offsetInAtom = targetAddress - atomAddress; + return std::error_code(); +} + +/// Find the atom for the symbol that was at the \p index in the symbol +/// table. +std::error_code FileCOFF::getAtomBySymbolIndex(uint32_t index, Atom *&ret) { + ErrorOr<llvm::object::COFFSymbolRef> symbol = _obj->getSymbol(index); + if (std::error_code ec = symbol.getError()) + return ec; + ret = _symbolAtom[*symbol]; + assert(ret); + return std::error_code(); +} + +/// Add relocation information to an atom based on \p rel. \p rel is an +/// relocation entry for the \p section, and \p atoms are all the atoms +/// defined in the \p section. +std::error_code FileCOFF::addRelocationReference( + const coff_relocation *rel, const coff_section *section) { + // The address of the item which relocation is applied. Section's + // VirtualAddress needs to be added for historical reasons, but the value + // is usually just zero, so adding it is usually no-op. + uint32_t itemAddress = rel->VirtualAddress + section->VirtualAddress; + + Atom *targetAtom = nullptr; + if (std::error_code ec = + getAtomBySymbolIndex(rel->SymbolTableIndex, targetAtom)) + return ec; + + COFFDefinedFileAtom *atom; + uint32_t offsetInAtom; + if (std::error_code ec = findAtomAt(section, itemAddress, atom, offsetInAtom)) + return ec; + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, _referenceArch, rel->Type, offsetInAtom, + targetAtom, 0)); + return std::error_code(); +} + +// Read section contents. +std::error_code FileCOFF::getSectionContents(StringRef sectionName, + ArrayRef<uint8_t> &result) { + const coff_section *section = nullptr; + if (std::error_code ec = findSection(sectionName, section)) + return ec; + if (!section) + return std::error_code(); + if (std::error_code ec = _obj->getSectionContents(section, result)) + return ec; + return std::error_code(); +} + +AliasAtom * +FileCOFF::createAlias(StringRef name, const DefinedAtom *target, int cnt) { + AliasAtom *alias = new (_alloc) AliasAtom(*this, name); + alias->addReference(Reference::KindNamespace::all, Reference::KindArch::all, + Reference::kindLayoutAfter, 0, target, 0); + alias->setMerge(DefinedAtom::mergeAsWeak); + if (target->contentType() == DefinedAtom::typeCode) + alias->setDeadStrip(DefinedAtom::deadStripNever); + alias->setOrdinal(target->ordinal() - cnt); + return alias; +} + +void FileCOFF::createAlternateNameAtoms() { + std::vector<AliasAtom *> aliases; + for (const DefinedAtom *atom : defined()) { + int cnt = 1; + for (StringRef alias : _ctx.getAlternateNames(atom->name())) + aliases.push_back(createAlias(alias, atom, cnt++)); + } + for (AliasAtom *alias : aliases) + _definedAtoms._atoms.push_back(alias); +} + +// Interpret the contents of .drectve section. If exists, the section contains +// a string containing command line options. The linker is expected to +// interpret the options as if they were given via the command line. +// +// The section mainly contains /defaultlib (-l in Unix), but can contain any +// options as long as they are valid. +std::error_code +FileCOFF::parseDirectiveSection(StringRef directives) { + DEBUG(llvm::dbgs() << ".drectve: " << directives << "\n"); + + // Split the string into tokens, as the shell would do for argv. + SmallVector<const char *, 16> tokens; + tokens.push_back("link"); // argv[0] is the command name. Will be ignored. + llvm::cl::TokenizeWindowsCommandLine(directives, _stringSaver, tokens); + tokens.push_back(nullptr); + + // Calls the command line parser to interpret the token string as if they + // were given via the command line. + int argc = tokens.size() - 1; + const char **argv = &tokens[0]; + std::string errorMessage; + llvm::raw_string_ostream stream(errorMessage); + PECOFFLinkingContext::ParseDirectives parseDirectives = + _ctx.getParseDirectives(); + bool parseFailed = !parseDirectives(argc, argv, _ctx, stream); + stream.flush(); + // Print error message if error. + if (parseFailed) { + return make_dynamic_error_code( + Twine("Failed to parse '") + directives + "'\n" + + "Reason: " + errorMessage); + } + if (!errorMessage.empty()) { + llvm::errs() << "lld warning: " << errorMessage << "\n"; + } + return std::error_code(); +} + +/// Returns the target machine type of the current object file. +std::error_code FileCOFF::getReferenceArch(Reference::KindArch &result) { + switch (_obj->getMachine()) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + result = Reference::KindArch::x86; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + result = Reference::KindArch::x86_64; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + result = Reference::KindArch::ARM; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN: + result = Reference::KindArch::all; + return std::error_code(); + } + llvm::errs() << "Unsupported machine type: 0x" + << llvm::utohexstr(_obj->getMachine()) << '\n'; + return llvm::object::object_error::parse_failed; +} + +/// Add relocation information to atoms. +std::error_code FileCOFF::addRelocationReferenceToAtoms() { + // Relocation entries are defined for each section. + for (const auto &sec : _obj->sections()) { + const coff_section *section = _obj->getCOFFSection(sec); + + // Skip if there's no atom for the section. Currently we do not create any + // atoms for some sections, such as "debug$S", and such sections need to + // be skipped here too. + if (_sectionAtoms.find(section) == _sectionAtoms.end()) + continue; + + for (const auto &reloc : sec.relocations()) { + const coff_relocation *rel = _obj->getCOFFRelocation(reloc); + if (auto ec = addRelocationReference(rel, section)) + return ec; + } + } + return std::error_code(); +} + +// Read .sxdata section if exists. .sxdata is a x86-only section that contains a +// vector of symbol offsets. The symbols pointed by this section are SEH handler +// functions contained in the same object file. The linker needs to construct a +// SEH table and emit it to executable. +// +// On x86, exception handler addresses are in stack, so they are vulnerable to +// stack overflow attack. In order to protect against it, Windows runtime uses +// the SEH table to check if a SEH handler address in stack is a real address of +// a handler created by compiler. +// +// What we want to emit from the linker is a vector of SEH handler VAs, but here +// we have a vector of offsets to the symbol table. So we convert the latter to +// the former. +std::error_code FileCOFF::maybeCreateSXDataAtoms() { + ArrayRef<uint8_t> sxdata; + if (std::error_code ec = getSectionContents(".sxdata", sxdata)) + return ec; + if (sxdata.empty()) + return std::error_code(); + + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", ".sxdata", 0, Atom::scopeTranslationUnit, + DefinedAtom::typeData, false /*isComdat*/, DefinedAtom::permR__, + DefinedAtom::mergeNo, sxdata, getNextOrdinal()); + + const ulittle32_t *symbolIndex = + reinterpret_cast<const ulittle32_t *>(sxdata.data()); + int numSymbols = sxdata.size() / sizeof(uint32_t); + + for (int i = 0; i < numSymbols; ++i) { + Atom *handlerFunc; + if (std::error_code ec = getAtomBySymbolIndex(symbolIndex[i], handlerFunc)) + return ec; + int offsetInAtom = i * sizeof(uint32_t); + + uint16_t rtype; + switch (_obj->getMachine()) { + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + rtype = llvm::COFF::IMAGE_REL_AMD64_ADDR32; + break; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + rtype = llvm::COFF::IMAGE_REL_I386_DIR32; + break; + default: + llvm_unreachable("unsupported machine type"); + } + + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, _referenceArch, rtype, offsetInAtom, + handlerFunc, 0)); + } + + _definedAtoms._atoms.push_back(atom); + return std::error_code(); +} + +/// Find a section by name. +std::error_code FileCOFF::findSection(StringRef name, + const coff_section *&result) { + for (const auto &sec : _obj->sections()) { + const coff_section *section = _obj->getCOFFSection(sec); + StringRef sectionName; + if (auto ec = _obj->getSectionName(section, sectionName)) + return ec; + if (sectionName == name) { + result = section; + return std::error_code(); + } + } + // Section was not found, but it's not an error. This method returns + // an error only when there's a read error. + return std::error_code(); +} + +// Convert ArrayRef<uint8_t> to std::string. The array contains a string which +// may not be terminated by NUL. +StringRef FileCOFF::ArrayRefToString(ArrayRef<uint8_t> array) { + // .drectve sections are encoded in either ASCII or UTF-8 with BOM. + // The PE/COFF spec allows ANSI (Windows-1252 encoding), but seems + // it's no longer in use. + // Skip a UTF-8 byte marker if exists. + if (array.size() >= 3 && array[0] == 0xEF && array[1] == 0xBB && + array[2] == 0xBF) { + array = array.slice(3); + } + if (array.empty()) + return ""; + StringRef s(reinterpret_cast<const char *>(array.data()), array.size()); + s = s.substr(0, s.find_first_of('\0')); + std::string *contents = new (_alloc) std::string(s.data(), s.size()); + return StringRef(*contents).trim(); +} + +// getNextOrdinal returns a monotonically increasaing uint64_t number +// starting from 1. There's a large gap between two numbers returned +// from this function, so that you can put other atoms between them. +uint64_t FileCOFF::getNextOrdinal() { + return _ordinal++ << 32; +} + +class COFFObjectReader : public Reader { +public: + COFFObjectReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef ext, + const MemoryBuffer &) const override { + return magic == llvm::sys::fs::file_magic::coff_object; + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + // Parse the memory buffer as PECOFF file. + auto *file = new FileCOFF(std::move(mb), _ctx); + result.push_back(std::unique_ptr<File>(file)); + return std::error_code(); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +using namespace llvm::COFF; + +const Registry::KindStrings kindStringsI386[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR16), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_REL16), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SEG12), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECREL7), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_REL32), + LLD_KIND_STRING_END}; + +const Registry::KindStrings kindStringsAMD64[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR64), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_1), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_2), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_3), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_4), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_5), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECREL7), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SREL32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_PAIR), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SSPAN32), + LLD_KIND_STRING_END}; + +const Registry::KindStrings kindStringsARMNT[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ADDR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ADDR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH24), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH11), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX24), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX11), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_MOV32A), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_MOV32T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH20T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH24T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX23T), +}; + +} // end namespace anonymous + +namespace lld { + +void Registry::addSupportCOFFObjects(PECOFFLinkingContext &ctx) { + add(std::unique_ptr<Reader>(new COFFObjectReader(ctx))); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::x86, + kindStringsI386); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::x86_64, + kindStringsAMD64); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::ARM, + kindStringsARMNT); +} + +} diff --git a/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp new file mode 100644 index 000000000000..8c9641376a0d --- /dev/null +++ b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp @@ -0,0 +1,389 @@ +//===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This file provides a way to read an import library member in a +/// .lib file. +/// +/// Archive Files in Windows +/// ======================== +/// +/// In Windows, archive files with .lib file extension serve two different +/// purposes. +/// +/// - For static linking: An archive file in this use case contains multiple +/// regular .obj files and is used for static linking. This is the same +/// usage as .a file in Unix. +/// +/// - For dynamic linking: An archive file in this use case contains pseudo +/// .obj files to describe exported symbols of a DLL. Each pseudo .obj file +/// in an archive has a name of an exported symbol and a DLL filename from +/// which the symbol can be imported. When you link a DLL on Windows, you +/// pass the name of the .lib file for the DLL instead of the DLL filename +/// itself. That is the Windows way of linking against a shared library. +/// +/// This file contains a function to handle the pseudo object file. +/// +/// Windows Loader and Import Address Table +/// ======================================= +/// +/// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs +/// contains a list of DLL names and list of symbols that need to be resolved by +/// the loader. Windows loader maps the executable and all the DLLs to memory, +/// resolves the symbols referencing items in DLLs, and updates the import +/// address table (IAT) in memory. The IAT is an array of pointers to all of the +/// data or functions in DLL referenced by the executable. You cannot access +/// items in DLLs directly. They have to be accessed through an extra level of +/// indirection. +/// +/// So, if you want to access an item in DLL, you have to go through a +/// pointer. How do you actually do that? You need a symbol for a pointer in the +/// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is +/// exported from the DLL for an IAT entry. For example, if you have a global +/// variable "foo" in a DLL, a pointer to the variable is available as +/// "_imp__foo". The IAT is an array of _imp__ symbols. +/// +/// Is this OK? That's not that complicated. Because items in a DLL are not +/// directly accessible, you need to access through a pointer, and the pointer +/// is available as a symbol with _imp__ prefix. +/// +/// Note 1: Although you can write code with _imp__ prefix, today's compiler and +/// linker let you write code as if there's no extra level of indirection. +/// That's why you haven't seen lots of _imp__ in your code. A variable or a +/// function declared with "dllimport" attribute is treated as an item in a DLL, +/// and the compiler automatically mangles its name and inserts the extra level +/// of indirection when accessing the item. Here are some examples: +/// +/// __declspec(dllimport) int var_in_dll; +/// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3; +/// +/// __declspec(dllimport) int fn_in_dll(void); +/// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)(); +/// +/// It's just the compiler rewrites code for you so that you don't need to +/// handle the indirection yourself. +/// +/// Note 2: __declspec(dllimport) is mandatory for data but optional for +/// function. For a function, the linker creates a jump table with the original +/// symbol name, so that the function is accessible without _imp__ prefix. The +/// same function in a DLL can be called through two different symbols if it's +/// not dllimport'ed. +/// +/// (*_imp__fn)() +/// fn() +/// +/// The above functions do the same thing. fn's content is a JMP instruction to +/// branch to the address pointed by _imp__fn. The latter may be a little bit +/// slower than the former because it will execute the extra JMP instruction, +/// but that's usually negligible. +/// +/// If a function is dllimport'ed, which is usually done in a header file, +/// mangled name will be used at compile time so the jump table will not be +/// used. +/// +/// Because there's no way to hide the indirection for data access at link time, +/// data has to be accessed through dllimport'ed symbols or explicit _imp__ +/// prefix. +/// +/// Idata Sections in the Pseudo Object File +/// ======================================== +/// +/// The object file created by cl.exe has several sections whose name starts +/// with ".idata$" followed by a number. The contents of the sections seem the +/// fragments of a complete ".idata" section. These sections has relocations for +/// the data referenced from the idata secton. Generally, the linker discards +/// "$" and all characters that follow from the section name and merges their +/// contents to one section. So, it looks like if everything would work fine, +/// the idata section would naturally be constructed without having any special +/// code for doing that. +/// +/// However, the LLD linker cannot do that. An idata section constructed in that +/// way was never be in valid format. We don't know the reason yet. Our +/// assumption on the idata fragment could simply be wrong, or the LLD linker is +/// not powerful enough to do the job. Meanwhile, we construct the idata section +/// ourselves. All the "idata$" sections in the pseudo object file are currently +/// ignored. +/// +/// Creating Atoms for the Import Address Table +/// =========================================== +/// +/// The function in this file reads a pseudo object file and creates at most two +/// atoms. One is a shared library atom for _imp__ symbol. The another is a +/// defined atom for the JMP instruction if the symbol is for a function. +/// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> +#include <map> +#include <system_error> +#include <vector> + +using namespace lld; +using namespace lld::pecoff; +using namespace llvm; +using namespace llvm::support::endian; + +#define DEBUG_TYPE "ReaderImportHeader" + +namespace lld { + +namespace { + +// This code is valid both in x86 and x64. +const uint8_t FuncAtomContentX86[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 + 0xcc, 0xcc // INT 3; INT 3 +}; + +const uint8_t FuncAtomContentARMNT[] = { + 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 + 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 + 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] +}; + +static void setJumpInstTarget(COFFLinkerInternalAtom *src, const Atom *dst, + int off, MachineTypes machine) { + SimpleReference *ref; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32, + off, dst, 0); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_REL32, + off, dst, 0); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::ARM, + llvm::COFF::IMAGE_REL_ARM_MOV32T, + off, dst, 0); + break; + } + src->addReference(std::unique_ptr<SimpleReference>(ref)); +} + +/// The defined atom for jump table. +class FuncAtom : public COFFLinkerInternalAtom { +public: + FuncAtom(const File &file, StringRef symbolName, + const COFFSharedLibraryAtom *impAtom, MachineTypes machine) + : COFFLinkerInternalAtom(file, /*oridnal*/ 0, createContent(machine), + symbolName) { + size_t Offset; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + Offset = 2; + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + Offset = 0; + break; + } + + setJumpInstTarget(this, impAtom, Offset, machine); + } + + uint64_t ordinal() const override { return 0; } + Scope scope() const override { return scopeGlobal; } + ContentType contentType() const override { return typeCode; } + Alignment alignment() const override { return Alignment(1); } + ContentPermissions permissions() const override { return permR_X; } + +private: + std::vector<uint8_t> createContent(MachineTypes machine) const { + const uint8_t *Data; + size_t Size; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + Data = FuncAtomContentX86; + Size = sizeof(FuncAtomContentX86); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + Data = FuncAtomContentARMNT; + Size = sizeof(FuncAtomContentARMNT); + break; + } + + return std::vector<uint8_t>(Data, Data + Size); + } +}; + +class FileImportLibrary : public File { +public: + FileImportLibrary(std::unique_ptr<MemoryBuffer> mb, MachineTypes machine) + : File(mb->getBufferIdentifier(), kindSharedLibrary), + _mb(std::move(mb)), _machine(machine) {} + + std::error_code doParse() override { + const char *buf = _mb->getBufferStart(); + const char *end = _mb->getBufferEnd(); + + // The size of the string that follows the header. + uint32_t dataSize + = read32le(buf + offsetof(COFF::ImportHeader, SizeOfData)); + + // Check if the total size is valid. + if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize) + return make_error_code(NativeReaderError::unknown_file_format); + + uint16_t hint = read16le(buf + offsetof(COFF::ImportHeader, OrdinalHint)); + StringRef symbolName(buf + sizeof(COFF::ImportHeader)); + StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1); + + // TypeInfo is a bitfield. The least significant 2 bits are import + // type, followed by 3 bit import name type. + uint16_t typeInfo = read16le(buf + offsetof(COFF::ImportHeader, TypeInfo)); + int type = typeInfo & 0x3; + int nameType = (typeInfo >> 2) & 0x7; + + // Symbol name used by the linker may be different from the symbol name used + // by the loader. The latter may lack symbol decorations, or may not even + // have name if it's imported by ordinal. + StringRef importName = symbolNameToImportName(symbolName, nameType); + + const COFFSharedLibraryAtom *dataAtom = + addSharedLibraryAtom(hint, symbolName, importName, dllName); + if (type == llvm::COFF::IMPORT_CODE) + addFuncAtom(symbolName, dllName, dataAtom); + + return std::error_code(); + } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _noUndefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _noAbsoluteAtoms; + } + +private: + const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint, + StringRef symbolName, + StringRef importName, + StringRef dllName) { + auto *atom = new (_alloc) + COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName); + _sharedLibraryAtoms._atoms.push_back(atom); + return atom; + } + + void addFuncAtom(StringRef symbolName, StringRef dllName, + const COFFSharedLibraryAtom *impAtom) { + auto *atom = new (_alloc) FuncAtom(*this, symbolName, impAtom, _machine); + _definedAtoms._atoms.push_back(atom); + } + + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + mutable llvm::BumpPtrAllocator _alloc; + + // Does the same thing as StringRef::ltrim() but removes at most one + // character. + StringRef ltrim1(StringRef str, const char *chars) const { + if (!str.empty() && strchr(chars, str[0])) + return str.substr(1); + return str; + } + + // Convert the given symbol name to the import symbol name exported by the + // DLL. + StringRef symbolNameToImportName(StringRef symbolName, int nameType) const { + StringRef ret; + switch (nameType) { + case llvm::COFF::IMPORT_ORDINAL: + // The import is by ordinal. No symbol name will be used to identify the + // item in the DLL. Only its ordinal will be used. + return ""; + case llvm::COFF::IMPORT_NAME: + // The import name in this case is identical to the symbol name. + return symbolName; + case llvm::COFF::IMPORT_NAME_NOPREFIX: + // The import name is the symbol name without leading ?, @ or _. + ret = ltrim1(symbolName, "?@_"); + break; + case llvm::COFF::IMPORT_NAME_UNDECORATE: + // Similar to NOPREFIX, but we also need to truncate at the first @. + ret = ltrim1(symbolName, "?@_"); + ret = ret.substr(0, ret.find('@')); + break; + } + std::string *str = new (_alloc) std::string(ret); + return *str; + } + + std::unique_ptr<MemoryBuffer> _mb; + MachineTypes _machine; +}; + +class COFFImportLibraryReader : public Reader { +public: + COFFImportLibraryReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef, + const MemoryBuffer &mb) const override { + if (mb.getBufferSize() < sizeof(COFF::ImportHeader)) + return false; + return (magic == llvm::sys::fs::file_magic::coff_import_library); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File> > &result) const override { + auto *file = new FileImportLibrary(std::move(mb), _ctx.getMachineType()); + result.push_back(std::unique_ptr<File>(file)); + return std::error_code(); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +} // end anonymous namespace + +void Registry::addSupportCOFFImportLibraries(PECOFFLinkingContext &ctx) { + add(llvm::make_unique<COFFImportLibraryReader>(ctx)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp b/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp new file mode 100644 index 000000000000..fd3360f018b6 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp @@ -0,0 +1,118 @@ +//===- lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This file is responsible for creating the Import Library file. +/// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace pecoff { + +/// Creates a .def file containing the list of exported symbols. +static std::string +createModuleDefinitionFile(const PECOFFLinkingContext &ctx) { + std::string ret; + llvm::raw_string_ostream os(ret); + os << "LIBRARY \"" << llvm::sys::path::filename(ctx.outputPath()) << "\"\n" + << "EXPORTS\n"; + + for (const PECOFFLinkingContext::ExportDesc &desc : ctx.getDllExports()) { + // Symbol names in a module-definition file will be mangled by lib.exe, + // so we need to demangle them before writing to a .def file. + os << " "; + if (!desc.externalName.empty()) { + os << desc.externalName; + } else if (!desc.mangledName.empty()) { + os << ctx.undecorateSymbol(desc.mangledName); + } else { + os << ctx.undecorateSymbol(desc.name); + } + + if (!desc.isPrivate) + os << " @" << desc.ordinal; + if (desc.noname) + os << " NONAME"; + if (desc.isData) + os << " DATA"; + if (desc.isPrivate) + os << " PRIVATE"; + os << "\n"; + } + os.flush(); + return ret; +} + +static std::string writeToTempFile(StringRef contents) { + SmallString<128> path; + int fd; + if (llvm::sys::fs::createTemporaryFile("tmp", "def", fd, path)) { + llvm::errs() << "Failed to create temporary file\n"; + return ""; + } + llvm::raw_fd_ostream os(fd, /*shouldClose*/ true); + os << contents; + return path.str(); +} + +static void writeTo(StringRef path, StringRef contents) { + int fd; + if (llvm::sys::fs::openFileForWrite(path, fd, llvm::sys::fs::F_Text)) { + llvm::errs() << "Failed to open " << path << "\n"; + return; + } + llvm::raw_fd_ostream os(fd, /*shouldClose*/ true); + os << contents; +} + +/// Creates a .def file and runs lib.exe on it to create an import library. +void writeImportLibrary(const PECOFFLinkingContext &ctx) { + std::string fileContents = createModuleDefinitionFile(ctx); + + std::string program = "lib.exe"; + ErrorOr<std::string> programPathOrErr = llvm::sys::findProgramByName(program); + if (!programPathOrErr) { + llvm::errs() << "Unable to find " << program << " in PATH\n"; + } else { + const std::string &programPath = *programPathOrErr; + + std::string defPath = writeToTempFile(fileContents); + llvm::FileRemover tmpFile(defPath); + + std::string defArg = "/def:"; + defArg.append(defPath); + std::string outputArg = "/out:"; + outputArg.append(ctx.getOutputImportLibraryPath()); + + std::vector<const char *> args; + args.push_back(programPath.c_str()); + args.push_back("/nologo"); + args.push_back(ctx.is64Bit() ? "/machine:x64" : "/machine:x86"); + args.push_back(defArg.c_str()); + args.push_back(outputArg.c_str()); + args.push_back(nullptr); + + if (llvm::sys::ExecuteAndWait(programPath.c_str(), &args[0]) != 0) + llvm::errs() << program << " failed\n"; + } + + // If /lldmoduledeffile:<filename> is given, make a copy of the + // temporary module definition file. This feature is for unit tests. + if (!ctx.getModuleDefinitionFile().empty()) + writeTo(ctx.getModuleDefinitionFile(), fileContents); +} + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/WriterImportLibrary.h b/lib/ReaderWriter/PECOFF/WriterImportLibrary.h new file mode 100644 index 000000000000..a51b9a3648c5 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterImportLibrary.h @@ -0,0 +1,23 @@ +//===- lib/ReaderWriter/PECOFF/WriterImportLibrary.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_WRITER_IMPORT_LIBRARY_H +#define LLD_READER_WRITER_PE_COFF_WRITER_IMPORT_LIBRARY_H + +namespace lld { +class PECOFFLinkingContext; + +namespace pecoff { + +void writeImportLibrary(const PECOFFLinkingContext &ctx); + +} // end namespace pecoff +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp b/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp new file mode 100644 index 000000000000..d34e2d3d63fd --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp @@ -0,0 +1,1417 @@ +//===- lib/ReaderWriter/PECOFF/WriterPECOFF.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// PE/COFF file consists of DOS Header, PE Header, COFF Header and Section +/// Tables followed by raw section data. +/// +/// This writer is responsible for writing Core Linker results to an Windows +/// executable file. +/// +/// This writer currently supports 32 bit PE/COFF for x86 processor only. +/// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "WriterImportLibrary.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/AtomLayout.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include <algorithm> +#include <cstdlib> +#include <map> +#include <time.h> +#include <vector> + +#define DEBUG_TYPE "WriterPECOFF" + +using namespace llvm::support::endian; + +using llvm::COFF::DataDirectoryIndex; +using llvm::object::coff_runtime_function_x64; +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +namespace lld { +namespace pecoff { + +// Disk sector size. Some data needs to be aligned at disk sector boundary in +// file. +static const int SECTOR_SIZE = 512; + +namespace { +class SectionChunk; + +/// A Chunk is an abstract contiguous range in an output file. +class Chunk { +public: + enum Kind { + kindHeader, + kindSection, + kindStringTable, + kindAtomChunk + }; + + explicit Chunk(Kind kind) : _kind(kind), _size(0) {} + virtual ~Chunk() {} + virtual void write(uint8_t *buffer) = 0; + virtual uint64_t size() const { return _size; } + virtual uint64_t onDiskSize() const { return size(); } + virtual uint64_t align() const { return 1; } + + uint64_t fileOffset() const { return _fileOffset; } + void setFileOffset(uint64_t fileOffset) { _fileOffset = fileOffset; } + Kind getKind() const { return _kind; } + +protected: + Kind _kind; + uint64_t _size; + uint64_t _fileOffset; +}; + +/// A HeaderChunk is an abstract class to represent a file header for +/// PE/COFF. The data in the header chunk is metadata about program and will +/// be consumed by the windows loader. HeaderChunks are not mapped to memory +/// when executed. +class HeaderChunk : public Chunk { +public: + HeaderChunk() : Chunk(kindHeader) {} + + static bool classof(const Chunk *c) { return c->getKind() == kindHeader; } +}; + +/// A DOSStubChunk represents the DOS compatible header at the beginning +/// of PE/COFF files. +class DOSStubChunk : public HeaderChunk { +public: + explicit DOSStubChunk(const PECOFFLinkingContext &ctx) + : HeaderChunk(), _context(ctx) { + // Minimum size of DOS stub is 64 bytes. The next block (PE header) needs to + // be aligned on 8 byte boundary. + size_t size = std::max(_context.getDosStub().size(), (size_t)64); + _size = llvm::RoundUpToAlignment(size, 8); + } + + void write(uint8_t *buffer) override { + ArrayRef<uint8_t> array = _context.getDosStub(); + std::memcpy(buffer, array.data(), array.size()); + auto *header = reinterpret_cast<llvm::object::dos_header *>(buffer); + header->AddressOfRelocationTable = sizeof(llvm::object::dos_header); + header->AddressOfNewExeHeader = _size; + } + +private: + const PECOFFLinkingContext &_context; +}; + +/// A PEHeaderChunk represents PE header including COFF header. +template <class PEHeader> +class PEHeaderChunk : public HeaderChunk { +public: + explicit PEHeaderChunk(const PECOFFLinkingContext &ctx); + + void write(uint8_t *buffer) override; + + void setSizeOfHeaders(uint64_t size) { + // Must be multiple of FileAlignment. + _peHeader.SizeOfHeaders = llvm::RoundUpToAlignment(size, SECTOR_SIZE); + } + + void setSizeOfCode(uint64_t size) { _peHeader.SizeOfCode = size; } + void setBaseOfCode(uint32_t rva) { _peHeader.BaseOfCode = rva; } + void setBaseOfData(uint32_t rva); + void setSizeOfImage(uint32_t size) { _peHeader.SizeOfImage = size; } + + void setSizeOfInitializedData(uint64_t size) { + _peHeader.SizeOfInitializedData = size; + } + + void setSizeOfUninitializedData(uint64_t size) { + _peHeader.SizeOfUninitializedData = size; + } + + void setNumberOfSections(uint32_t num) { _coffHeader.NumberOfSections = num; } + void setNumberOfSymbols(uint32_t num) { _coffHeader.NumberOfSymbols = num; } + + void setAddressOfEntryPoint(uint32_t address) { + _peHeader.AddressOfEntryPoint = address; + } + + void setPointerToSymbolTable(uint32_t rva) { + _coffHeader.PointerToSymbolTable = rva; + } + +private: + llvm::object::coff_file_header _coffHeader; + PEHeader _peHeader; +}; + +/// A SectionHeaderTableChunk represents Section Table Header of PE/COFF +/// format, which is a list of section headers. +class SectionHeaderTableChunk : public HeaderChunk { +public: + SectionHeaderTableChunk() : HeaderChunk() {} + void addSection(SectionChunk *chunk); + uint64_t size() const override; + void write(uint8_t *buffer) override; + +private: + static llvm::object::coff_section createSectionHeader(SectionChunk *chunk); + + std::vector<SectionChunk *> _sections; +}; + +class StringTableChunk : public Chunk { +public: + StringTableChunk() : Chunk(kindStringTable) {} + + static bool classof(const Chunk *c) { + return c->getKind() == kindStringTable; + } + + uint32_t addSectionName(StringRef sectionName) { + if (_stringTable.empty()) { + // The string table immediately follows the symbol table. + // We don't really need a symbol table, but some tools (e.g. dumpbin) + // don't like zero-length symbol table. + // Make room for the empty symbol slot, which occupies 18 byte. + // We also need to reserve 4 bytes for the string table header. + int size = sizeof(llvm::object::coff_symbol16) + 4; + _stringTable.insert(_stringTable.begin(), size, 0); + // Set the name of the dummy symbol to the first string table entry. + // It's better than letting dumpbin print out a garabage as a symbol name. + char *off = _stringTable.data() + 4; + write32le(off, 4); + } + uint32_t offset = _stringTable.size(); + _stringTable.insert(_stringTable.end(), sectionName.begin(), + sectionName.end()); + _stringTable.push_back('\0'); + return offset - sizeof(llvm::object::coff_symbol16); + } + + uint64_t size() const override { return _stringTable.size(); } + + void write(uint8_t *buffer) override { + if (_stringTable.empty()) + return; + char *off = _stringTable.data() + sizeof(llvm::object::coff_symbol16); + write32le(off, _stringTable.size()); + std::memcpy(buffer, _stringTable.data(), _stringTable.size()); + } + +private: + std::vector<char> _stringTable; +}; + +class SectionChunk : public Chunk { +public: + uint64_t onDiskSize() const override { + if (_characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return 0; + return llvm::RoundUpToAlignment(size(), SECTOR_SIZE); + } + + uint64_t align() const override { return SECTOR_SIZE; } + uint32_t getCharacteristics() const { return _characteristics; } + StringRef getSectionName() const { return _sectionName; } + virtual uint64_t memAlign() const { return _memAlign; } + + static bool classof(const Chunk *c) { + Kind kind = c->getKind(); + return kind == kindSection || kind == kindAtomChunk; + } + + uint64_t getVirtualAddress() { return _virtualAddress; } + virtual void setVirtualAddress(uint32_t rva) { _virtualAddress = rva; } + + uint32_t getStringTableOffset() const { return _stringTableOffset; } + void setStringTableOffset(uint32_t offset) { _stringTableOffset = offset; } + +protected: + SectionChunk(Kind kind, StringRef sectionName, uint32_t characteristics, + const PECOFFLinkingContext &ctx) + : Chunk(kind), _sectionName(sectionName), + _characteristics(characteristics), _virtualAddress(0), + _stringTableOffset(0), _memAlign(ctx.getPageSize()) {} + +private: + StringRef _sectionName; + const uint32_t _characteristics; + uint64_t _virtualAddress; + uint32_t _stringTableOffset; + uint64_t _memAlign; +}; + +struct BaseReloc { + BaseReloc(uint64_t a, llvm::COFF::BaseRelocationType t) : addr(a), type(t) {} + uint64_t addr; + llvm::COFF::BaseRelocationType type; +}; + +/// An AtomChunk represents a section containing atoms. +class AtomChunk : public SectionChunk { +public: + AtomChunk(const PECOFFLinkingContext &ctx, StringRef name, + const std::vector<const DefinedAtom *> &atoms); + + void write(uint8_t *buffer) override; + + uint64_t memAlign() const override; + void appendAtom(const DefinedAtom *atom); + void buildAtomRvaMap(std::map<const Atom *, uint64_t> &atomRva) const; + + void applyRelocationsARM(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + void applyRelocationsX86(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + void applyRelocationsX64(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + + void printAtomAddresses(uint64_t baseAddr) const; + void addBaseRelocations(std::vector<BaseReloc> &relocSites) const; + + void setVirtualAddress(uint32_t rva) override; + uint64_t getAtomVirtualAddress(StringRef name) const; + + static bool classof(const Chunk *c) { return c->getKind() == kindAtomChunk; } + +protected: + std::vector<AtomLayout *> _atomLayouts; + uint64_t _virtualAddress; + +private: + uint32_t + computeCharacteristics(const PECOFFLinkingContext &ctx, StringRef name, + const std::vector<const DefinedAtom *> &atoms) const { + return ctx.getSectionAttributes(name, + getDefaultCharacteristics(name, atoms)); + } + + uint32_t getDefaultCharacteristics( + StringRef name, const std::vector<const DefinedAtom *> &atoms) const; + + mutable llvm::BumpPtrAllocator _alloc; + llvm::COFF::MachineTypes _machineType; + const PECOFFLinkingContext &_ctx; +}; + +/// A DataDirectoryChunk represents data directory entries that follows the PE +/// header in the output file. An entry consists of an 8 byte field that +/// indicates a relative virtual address (the starting address of the entry data +/// in memory) and 8 byte entry data size. +class DataDirectoryChunk : public HeaderChunk { +public: + DataDirectoryChunk() + : HeaderChunk(), _data(std::vector<llvm::object::data_directory>(16)) {} + + uint64_t size() const override { + return sizeof(llvm::object::data_directory) * _data.size(); + } + + void setField(DataDirectoryIndex index, uint32_t addr, uint32_t size); + void write(uint8_t *buffer) override; + +private: + std::vector<llvm::object::data_directory> _data; +}; + +/// A BaseRelocChunk represents ".reloc" section. +/// +/// .reloc section contains a list of addresses. If the PE/COFF loader decides +/// to load the binary at a memory address different from its preferred base +/// address, which is specified by ImageBase field in the COFF header, the +/// loader needs to relocate the binary, so that all the addresses in the binary +/// point to new locations. The loader will do that by fixing up the addresses +/// specified by .reloc section. +/// +/// The executable is almost always loaded at the preferred base address because +/// it's loaded into an empty address space. The DLL is however an subject of +/// load-time relocation because it may conflict with other DLLs or the +/// executable. +class BaseRelocChunk : public SectionChunk { + typedef std::vector<std::unique_ptr<Chunk> > ChunkVectorT; + +public: + BaseRelocChunk(ChunkVectorT &chunks, const PECOFFLinkingContext &ctx) + : SectionChunk(kindSection, ".reloc", characteristics, ctx), + _ctx(ctx), _contents(createContents(chunks)) {} + + void write(uint8_t *buffer) override { + std::memcpy(buffer, &_contents[0], _contents.size()); + } + + uint64_t size() const override { return _contents.size(); } + +private: + // When loaded into memory, reloc section should be readable and writable. + static const uint32_t characteristics = + llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE; + + std::vector<uint8_t> createContents(ChunkVectorT &chunks) const; + + // Returns a list of RVAs that needs to be relocated if the binary is loaded + // at an address different from its preferred one. + std::vector<BaseReloc> listRelocSites(ChunkVectorT &chunks) const; + + // Create the content of a relocation block. + std::vector<uint8_t> + createBaseRelocBlock(uint64_t pageAddr, const BaseReloc *begin, + const BaseReloc *end) const; + + const PECOFFLinkingContext &_ctx; + std::vector<uint8_t> _contents; +}; + +template <class PEHeader> +PEHeaderChunk<PEHeader>::PEHeaderChunk(const PECOFFLinkingContext &ctx) + : HeaderChunk() { + // Set the size of the chunk and initialize the header with null bytes. + _size = sizeof(llvm::COFF::PEMagic) + sizeof(_coffHeader) + sizeof(_peHeader); + std::memset(&_coffHeader, 0, sizeof(_coffHeader)); + std::memset(&_peHeader, 0, sizeof(_peHeader)); + + _coffHeader.Machine = ctx.getMachineType(); + _coffHeader.TimeDateStamp = time(nullptr); + + // Attributes of the executable. + uint16_t characteristics = llvm::COFF::IMAGE_FILE_EXECUTABLE_IMAGE; + if (!ctx.is64Bit()) + characteristics |= llvm::COFF::IMAGE_FILE_32BIT_MACHINE; + if (ctx.isDll()) + characteristics |= llvm::COFF::IMAGE_FILE_DLL; + if (ctx.getLargeAddressAware() || ctx.is64Bit()) + characteristics |= llvm::COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (ctx.getSwapRunFromCD()) + characteristics |= llvm::COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; + if (ctx.getSwapRunFromNet()) + characteristics |= llvm::COFF::IMAGE_FILE_NET_RUN_FROM_SWAP; + if (!ctx.getBaseRelocationEnabled()) + characteristics |= llvm::COFF::IMAGE_FILE_RELOCS_STRIPPED; + + _coffHeader.Characteristics = characteristics; + + _peHeader.Magic = ctx.is64Bit() ? llvm::COFF::PE32Header::PE32_PLUS + : llvm::COFF::PE32Header::PE32; + + // The address of the executable when loaded into memory. The default for + // DLLs is 0x10000000. The default for executables is 0x400000. + _peHeader.ImageBase = ctx.getBaseAddress(); + + // Sections should be page-aligned when loaded into memory, which is 4KB on + // x86. + _peHeader.SectionAlignment = ctx.getSectionDefaultAlignment(); + + // Sections in an executable file on disk should be sector-aligned (512 byte). + _peHeader.FileAlignment = SECTOR_SIZE; + + // The version number of the resultant executable/DLL. The number is purely + // informative, and neither the linker nor the loader won't use it. User can + // set the value using /version command line option. Default is 0.0. + PECOFFLinkingContext::Version imageVersion = ctx.getImageVersion(); + _peHeader.MajorImageVersion = imageVersion.majorVersion; + _peHeader.MinorImageVersion = imageVersion.minorVersion; + + // The required Windows version number. This is the internal version and + // shouldn't be confused with product name. Windows 7 is version 6.1 and + // Windows 8 is 6.2, for example. + PECOFFLinkingContext::Version minOSVersion = ctx.getMinOSVersion(); + _peHeader.MajorOperatingSystemVersion = minOSVersion.majorVersion; + _peHeader.MinorOperatingSystemVersion = minOSVersion.minorVersion; + _peHeader.MajorSubsystemVersion = minOSVersion.majorVersion; + _peHeader.MinorSubsystemVersion = minOSVersion.minorVersion; + + _peHeader.Subsystem = ctx.getSubsystem(); + + // Despite its name, DLL characteristics field has meaning both for + // executables and DLLs. We are not very sure if the following bits must + // be set, but regular binaries seem to have these bits, so we follow + // them. + uint16_t dllCharacteristics = 0; + if (ctx.noSEH()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_SEH; + if (ctx.isTerminalServerAware()) + dllCharacteristics |= + llvm::COFF::IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; + if (ctx.isNxCompat()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; + if (ctx.getDynamicBaseEnabled()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; + if (!ctx.getAllowBind()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_BIND; + if (!ctx.getAllowIsolation()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; + if (ctx.getHighEntropyVA() && ctx.is64Bit()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; + _peHeader.DLLCharacteristics = dllCharacteristics; + + _peHeader.SizeOfStackReserve = ctx.getStackReserve(); + _peHeader.SizeOfStackCommit = ctx.getStackCommit(); + _peHeader.SizeOfHeapReserve = ctx.getHeapReserve(); + _peHeader.SizeOfHeapCommit = ctx.getHeapCommit(); + + // The number of data directory entries. We always have 16 entries. + _peHeader.NumberOfRvaAndSize = 16; + + // The size of PE header including optional data directory. + _coffHeader.SizeOfOptionalHeader = sizeof(PEHeader) + + _peHeader.NumberOfRvaAndSize * sizeof(llvm::object::data_directory); +} + +template <> +void PEHeaderChunk<llvm::object::pe32_header>::setBaseOfData(uint32_t rva) { + _peHeader.BaseOfData = rva; +} + +template <> +void PEHeaderChunk<llvm::object::pe32plus_header>::setBaseOfData(uint32_t rva) { + // BaseOfData field does not exist in PE32+ header. +} + +template <class PEHeader> +void PEHeaderChunk<PEHeader>::write(uint8_t *buffer) { + std::memcpy(buffer, llvm::COFF::PEMagic, sizeof(llvm::COFF::PEMagic)); + buffer += sizeof(llvm::COFF::PEMagic); + std::memcpy(buffer, &_coffHeader, sizeof(_coffHeader)); + buffer += sizeof(_coffHeader); + std::memcpy(buffer, &_peHeader, sizeof(_peHeader)); +} + +AtomChunk::AtomChunk(const PECOFFLinkingContext &ctx, StringRef sectionName, + const std::vector<const DefinedAtom *> &atoms) + : SectionChunk(kindAtomChunk, sectionName, + computeCharacteristics(ctx, sectionName, atoms), ctx), + _virtualAddress(0), _machineType(ctx.getMachineType()), _ctx(ctx) { + for (auto *a : atoms) + appendAtom(a); +} + +void AtomChunk::write(uint8_t *buffer) { + if (_atomLayouts.empty()) + return; + if (getCharacteristics() & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return; + if (getCharacteristics() & llvm::COFF::IMAGE_SCN_CNT_CODE) { + // Fill the section with INT 3 (0xCC) rather than NUL, so that the + // disassembler will not interpret a garbage between atoms as the beginning + // of multi-byte machine code. This does not change the behavior of + // resulting binary but help debugging. + uint8_t *start = buffer + _atomLayouts.front()->_fileOffset; + uint8_t *end = buffer + _atomLayouts.back()->_fileOffset; + memset(start, 0xCC, end - start); + } + + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + ArrayRef<uint8_t> rawContent = atom->rawContent(); + std::memcpy(buffer + layout->_fileOffset, rawContent.data(), + rawContent.size()); + } +} + +// Add all atoms to the given map. This data will be used to do relocation. +void +AtomChunk::buildAtomRvaMap(std::map<const Atom *, uint64_t> &atomRva) const { + for (const auto *layout : _atomLayouts) + atomRva[layout->_atom] = layout->_virtualAddr; +} + +static int getSectionIndex(uint64_t targetAddr, + const std::vector<uint64_t> §ionRva) { + int i = 1; + for (uint64_t rva : sectionRva) { + if (targetAddr < rva) + return i; + ++i; + } + return i; +} + +static uint32_t getSectionStartAddr(uint64_t targetAddr, + const std::vector<uint64_t> §ionRva) { + // Scan the list of section start addresses to find the section start address + // for the given RVA. + for (int i = 0, e = sectionRva.size(); i < e; ++i) + if (i == e - 1 || (sectionRva[i] <= targetAddr && targetAddr < sectionRva[i + 1])) + return sectionRva[i]; + llvm_unreachable("Section missing"); +} + +static void applyThumbMoveImmediate(ulittle16_t *mov, uint16_t imm) { + // MOVW(T3): |11110|i|10|0|1|0|0|imm4|0|imm3|Rd|imm8| + // imm32 = zext imm4:i:imm3:imm8 + // MOVT(T1): |11110|i|10|1|1|0|0|imm4|0|imm3|Rd|imm8| + // imm16 = imm4:i:imm3:imm8 + mov[0] = + mov[0] | (((imm & 0x0800) >> 11) << 10) | (((imm & 0xf000) >> 12) << 0); + mov[1] = + mov[1] | (((imm & 0x0700) >> 8) << 12) | (((imm & 0x00ff) >> 0) << 0); +} + +static void applyThumbBranchImmediate(ulittle16_t *bl, int32_t imm) { + // BL(T1): |11110|S|imm10|11|J1|1|J2|imm11| + // imm32 = sext S:I1:I2:imm10:imm11:'0' + // B.W(T4): |11110|S|imm10|10|J1|1|J2|imm11| + // imm32 = sext S:I1:I2:imm10:imm11:'0' + // + // I1 = ~(J1 ^ S), I2 = ~(J2 ^ S) + + assert((~abs(imm) & (-1 << 24)) && "bl/b.w out of range"); + + uint32_t S = (imm < 0 ? 1 : 0); + uint32_t J1 = ((~imm & 0x00800000) >> 23) ^ S; + uint32_t J2 = ((~imm & 0x00400000) >> 22) ^ S; + + bl[0] = bl[0] | (((imm & 0x003ff000) >> 12) << 0) | (S << 10); + bl[1] = bl[1] | (((imm & 0x00000ffe) >> 1) << 0) | (J2 << 11) | (J1 << 13); +} + +void AtomChunk::applyRelocationsARM(uint8_t *Buffer, + std::map<const Atom *, uint64_t> &AtomRVA, + std::vector<uint64_t> &SectionRVA, + uint64_t ImageBase) { + Buffer = Buffer + _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *Atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *R : *Atom) { + if (R->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + bool AssumeTHUMBCode = false; + if (auto Target = dyn_cast<DefinedAtom>(R->target())) + AssumeTHUMBCode = Target->permissions() == DefinedAtom::permR_X || + Target->permissions() == DefinedAtom::permRWX; + + const auto AtomOffset = R->offsetInAtom(); + const auto FileOffset = layout->_fileOffset; + const auto TargetAddr = AtomRVA[R->target()] | (AssumeTHUMBCode ? 1 : 0); + auto RelocSite16 = + reinterpret_cast<ulittle16_t *>(Buffer + FileOffset + AtomOffset); + auto RelocSite32 = + reinterpret_cast<ulittle32_t *>(Buffer + FileOffset + AtomOffset); + + switch (R->kindValue()) { + default: llvm_unreachable("unsupported relocation type"); + case llvm::COFF::IMAGE_REL_ARM_ADDR32: + *RelocSite32 = *RelocSite32 + TargetAddr + ImageBase; + break; + case llvm::COFF::IMAGE_REL_ARM_ADDR32NB: + *RelocSite32 = *RelocSite32 + TargetAddr; + break; + case llvm::COFF::IMAGE_REL_ARM_MOV32T: + applyThumbMoveImmediate(&RelocSite16[0], (TargetAddr + ImageBase) >> 0); + applyThumbMoveImmediate(&RelocSite16[2], (TargetAddr + ImageBase) >> 16); + break; + case llvm::COFF::IMAGE_REL_ARM_BRANCH24T: + // NOTE: the thumb bit will implicitly be truncated properly + applyThumbBranchImmediate(RelocSite16, + TargetAddr - AtomRVA[Atom] - AtomOffset - 4); + break; + case llvm::COFF::IMAGE_REL_ARM_BLX23T: + // NOTE: the thumb bit will implicitly be truncated properly + applyThumbBranchImmediate(RelocSite16, + TargetAddr - AtomRVA[Atom] - AtomOffset - 4); + break; + } + } + }); +} + +void AtomChunk::applyRelocationsX86(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress) { + buffer += _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + // Skip if this reference is not for COFF relocation. + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + auto relocSite32 = reinterpret_cast<ulittle32_t *>( + buffer + layout->_fileOffset + ref->offsetInAtom()); + auto relocSite16 = reinterpret_cast<ulittle16_t *>(relocSite32); + const Atom *target = ref->target(); + uint64_t targetAddr = atomRva[target]; + // Also account for whatever offset is already stored at the relocation + // site. + switch (ref->kindValue()) { + case llvm::COFF::IMAGE_REL_I386_ABSOLUTE: + // This relocation is no-op. + break; + case llvm::COFF::IMAGE_REL_I386_DIR32: + // Set target's 32-bit VA. + if (auto *abs = dyn_cast<AbsoluteAtom>(target)) + *relocSite32 += abs->value(); + else + *relocSite32 += targetAddr + imageBaseAddress; + break; + case llvm::COFF::IMAGE_REL_I386_DIR32NB: + // Set target's 32-bit RVA. + *relocSite32 += targetAddr; + break; + case llvm::COFF::IMAGE_REL_I386_REL32: { + // Set 32-bit relative address of the target. This relocation is + // usually used for relative branch or call instruction. + uint32_t disp = atomRva[atom] + ref->offsetInAtom() + 4; + *relocSite32 += targetAddr - disp; + break; + } + case llvm::COFF::IMAGE_REL_I386_SECTION: + // The 16-bit section index that contains the target symbol. + *relocSite16 += getSectionIndex(targetAddr, sectionRva); + break; + case llvm::COFF::IMAGE_REL_I386_SECREL: + // The 32-bit relative address from the beginning of the section that + // contains the target symbol. + *relocSite32 += + targetAddr - getSectionStartAddr(targetAddr, sectionRva); + break; + default: + llvm::report_fatal_error("Unsupported relocation kind"); + } + } + }); +} + +void AtomChunk::applyRelocationsX64(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBase) { + buffer += _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + uint8_t *loc = buffer + layout->_fileOffset + ref->offsetInAtom(); + auto relocSite16 = reinterpret_cast<ulittle16_t *>(loc); + auto relocSite32 = reinterpret_cast<ulittle32_t *>(loc); + auto relocSite64 = reinterpret_cast<ulittle64_t *>(loc); + uint64_t targetAddr = atomRva[ref->target()]; + + switch (ref->kindValue()) { + case llvm::COFF::IMAGE_REL_AMD64_ADDR64: + *relocSite64 += targetAddr + imageBase; + break; + case llvm::COFF::IMAGE_REL_AMD64_ADDR32: + *relocSite32 += targetAddr + imageBase; + break; + case llvm::COFF::IMAGE_REL_AMD64_ADDR32NB: + *relocSite32 += targetAddr; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 4; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_1: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 5; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_2: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 6; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_3: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 7; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_4: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 8; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_5: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 9; + break; + case llvm::COFF::IMAGE_REL_AMD64_SECTION: + *relocSite16 += getSectionIndex(targetAddr, sectionRva) - 1; + break; + case llvm::COFF::IMAGE_REL_AMD64_SECREL: + *relocSite32 += + targetAddr - getSectionStartAddr(targetAddr, sectionRva); + break; + default: + llvm::errs() << "Kind: " << (int)ref->kindValue() << "\n"; + llvm::report_fatal_error("Unsupported relocation kind"); + } + } + }); +} + +/// Print atom VAs. Used only for debugging. +void AtomChunk::printAtomAddresses(uint64_t baseAddr) const { + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + uint64_t addr = layout->_virtualAddr; + llvm::dbgs() << llvm::format("0x%08llx: ", addr + baseAddr) + << (atom->name().empty() ? "(anonymous)" : atom->name()) + << "\n"; + } +} + +/// List all virtual addresses (and not relative virtual addresses) that need +/// to be fixed up if image base is relocated. The only relocation type that +/// needs to be fixed is DIR32 on i386. REL32 is not (and should not be) +/// fixed up because it's PC-relative. +void AtomChunk::addBaseRelocations(std::vector<BaseReloc> &relocSites) const { + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + // An absolute symbol points to a fixed location in memory. Their + // address should not be fixed at load time. One exception is ImageBase + // because that's relative to run-time image base address. + if (auto *abs = dyn_cast<AbsoluteAtom>(ref->target())) + if (!abs->name().equals("__ImageBase") && + !abs->name().equals("___ImageBase")) + continue; + + uint64_t address = layout->_virtualAddr + ref->offsetInAtom(); + switch (_machineType) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_I386_DIR32) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_HIGHLOW)); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_AMD64_ADDR64) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_DIR64)); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_ARM_ADDR32) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_HIGHLOW)); + else if (ref->kindValue() == llvm::COFF::IMAGE_REL_ARM_MOV32T) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_ARM_MOV32T)); + break; + } + } + } +} + +void AtomChunk::setVirtualAddress(uint32_t rva) { + SectionChunk::setVirtualAddress(rva); + for (AtomLayout *layout : _atomLayouts) + layout->_virtualAddr += rva; +} + +uint64_t AtomChunk::getAtomVirtualAddress(StringRef name) const { + for (auto atomLayout : _atomLayouts) + if (atomLayout->_atom->name() == name) + return atomLayout->_virtualAddr; + return 0; +} + +void DataDirectoryChunk::setField(DataDirectoryIndex index, uint32_t addr, + uint32_t size) { + llvm::object::data_directory &dir = _data[index]; + dir.RelativeVirtualAddress = addr; + dir.Size = size; +} + +void DataDirectoryChunk::write(uint8_t *buffer) { + std::memcpy(buffer, &_data[0], size()); +} + +uint64_t AtomChunk::memAlign() const { + // ReaderCOFF propagated the section alignment to the first atom in + // the section. We restore that here. + if (_atomLayouts.empty()) + return _ctx.getPageSize(); + int align = _ctx.getPageSize(); + for (auto atomLayout : _atomLayouts) { + auto *atom = cast<const DefinedAtom>(atomLayout->_atom); + align = std::max(align, 1 << atom->alignment().powerOf2); + } + return align; +} + +void AtomChunk::appendAtom(const DefinedAtom *atom) { + // Atom may have to be at a proper alignment boundary. If so, move the + // pointer to make a room after the last atom before adding new one. + _size = llvm::RoundUpToAlignment(_size, 1 << atom->alignment().powerOf2); + + // Create an AtomLayout and move the current pointer. + auto *layout = new (_alloc) AtomLayout(atom, _size, _size); + _atomLayouts.push_back(layout); + _size += atom->size(); +} + +uint32_t AtomChunk::getDefaultCharacteristics( + StringRef name, const std::vector<const DefinedAtom *> &atoms) const { + const uint32_t code = llvm::COFF::IMAGE_SCN_CNT_CODE; + const uint32_t execute = llvm::COFF::IMAGE_SCN_MEM_EXECUTE; + const uint32_t read = llvm::COFF::IMAGE_SCN_MEM_READ; + const uint32_t write = llvm::COFF::IMAGE_SCN_MEM_WRITE; + const uint32_t data = llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + const uint32_t bss = llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (name == ".text") + return code | execute | read; + if (name == ".data") + return data | read | write; + if (name == ".rdata") + return data | read; + if (name == ".bss") + return bss | read | write; + assert(atoms.size() > 0); + switch (atoms[0]->permissions()) { + case DefinedAtom::permR__: + return data | read; + case DefinedAtom::permRW_: + return data | read | write; + case DefinedAtom::permR_X: + return code | execute | read; + case DefinedAtom::permRWX: + return code | execute | read | write; + default: + llvm_unreachable("Unsupported permission"); + } +} + +void SectionHeaderTableChunk::addSection(SectionChunk *chunk) { + _sections.push_back(chunk); +} + +uint64_t SectionHeaderTableChunk::size() const { + return _sections.size() * sizeof(llvm::object::coff_section); +} + +void SectionHeaderTableChunk::write(uint8_t *buffer) { + uint64_t offset = 0; + for (SectionChunk *chunk : _sections) { + llvm::object::coff_section header = createSectionHeader(chunk); + std::memcpy(buffer + offset, &header, sizeof(header)); + offset += sizeof(header); + } +} + +llvm::object::coff_section +SectionHeaderTableChunk::createSectionHeader(SectionChunk *chunk) { + llvm::object::coff_section header; + + // We have extended the COFF specification by allowing section names to be + // greater than eight characters. We achieve this by adding the section names + // to the string table. Binutils' linker, ld, performs the same trick. + StringRef sectionName = chunk->getSectionName(); + std::memset(header.Name, 0, llvm::COFF::NameSize); + if (uint32_t stringTableOffset = chunk->getStringTableOffset()) + sprintf(header.Name, "/%u", stringTableOffset); + else + std::strncpy(header.Name, sectionName.data(), sectionName.size()); + + uint32_t characteristics = chunk->getCharacteristics(); + header.VirtualSize = chunk->size(); + header.VirtualAddress = chunk->getVirtualAddress(); + header.SizeOfRawData = chunk->onDiskSize(); + header.PointerToRelocations = 0; + header.PointerToLinenumbers = 0; + header.NumberOfRelocations = 0; + header.NumberOfLinenumbers = 0; + header.Characteristics = characteristics; + + if (characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + header.PointerToRawData = 0; + } else { + header.PointerToRawData = chunk->fileOffset(); + } + return header; +} + +/// Creates .reloc section content from the other sections. The content of +/// .reloc is basically a list of relocation sites. The relocation sites are +/// divided into blocks. Each block represents the base relocation for a 4K +/// page. +/// +/// By dividing 32 bit RVAs into blocks, COFF saves disk and memory space for +/// the base relocation. A block consists of a 32 bit page RVA and 16 bit +/// relocation entries which represent offsets in the page. That is a more +/// compact representation than a simple vector of 32 bit RVAs. +std::vector<uint8_t> +BaseRelocChunk::createContents(ChunkVectorT &chunks) const { + std::vector<uint8_t> contents; + std::vector<BaseReloc> relocSites = listRelocSites(chunks); + + uint64_t mask = _ctx.getPageSize() - 1; + parallel_sort(relocSites.begin(), relocSites.end(), + [=](const BaseReloc &a, const BaseReloc &b) { + return (a.addr & ~mask) < (b.addr & ~mask); + }); + + // Base relocations for the same memory page are grouped together + // and passed to createBaseRelocBlock. + for (auto it = relocSites.begin(), e = relocSites.end(); it != e;) { + auto beginIt = it; + uint64_t pageAddr = (beginIt->addr & ~mask); + for (++it; it != e; ++it) + if ((it->addr & ~mask) != pageAddr) + break; + const BaseReloc *begin = &*beginIt; + const BaseReloc *end = begin + (it - beginIt); + std::vector<uint8_t> block = createBaseRelocBlock(pageAddr, begin, end); + contents.insert(contents.end(), block.begin(), block.end()); + } + return contents; +} + +// Returns a list of RVAs that needs to be relocated if the binary is loaded +// at an address different from its preferred one. +std::vector<BaseReloc> +BaseRelocChunk::listRelocSites(ChunkVectorT &chunks) const { + std::vector<BaseReloc> ret; + for (auto &cp : chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->addBaseRelocations(ret); + return ret; +} + +// Create the content of a relocation block. +std::vector<uint8_t> +BaseRelocChunk::createBaseRelocBlock(uint64_t pageAddr, + const BaseReloc *begin, + const BaseReloc *end) const { + // Relocation blocks should be padded with IMAGE_REL_I386_ABSOLUTE to be + // aligned to a DWORD size boundary. + uint32_t size = llvm::RoundUpToAlignment( + sizeof(ulittle32_t) * 2 + sizeof(ulittle16_t) * (end - begin), + sizeof(ulittle32_t)); + std::vector<uint8_t> contents(size); + uint8_t *ptr = &contents[0]; + + // The first four bytes is the page RVA. + write32le(ptr, pageAddr); + ptr += sizeof(ulittle32_t); + + // The second four bytes is the size of the block, including the the page + // RVA and this size field. + write32le(ptr, size); + ptr += sizeof(ulittle32_t); + + uint64_t mask = _ctx.getPageSize() - 1; + for (const BaseReloc *i = begin; i < end; ++i) { + write16le(ptr, (i->type << 12) | (i->addr & mask)); + ptr += sizeof(ulittle16_t); + } + return contents; +} + +} // end anonymous namespace + +class PECOFFWriter : public Writer { +public: + explicit PECOFFWriter(const PECOFFLinkingContext &context) + : _ctx(context), _numSections(0), _imageSizeInMemory(_ctx.getPageSize()), + _imageSizeOnDisk(0) {} + + template <class PEHeader> void build(const File &linkedFile); + std::error_code writeFile(const File &linkedFile, StringRef path) override; + +private: + void applyAllRelocations(uint8_t *bufferStart); + void printAllAtomAddresses() const; + void reorderSEHTableEntries(uint8_t *bufferStart); + void reorderSEHTableEntriesX86(uint8_t *bufferStart); + void reorderSEHTableEntriesX64(uint8_t *bufferStart); + + void addChunk(Chunk *chunk); + void addSectionChunk(std::unique_ptr<SectionChunk> chunk, + SectionHeaderTableChunk *table, + StringTableChunk *stringTable); + void setImageSizeOnDisk(); + uint64_t + calcSectionSize(llvm::COFF::SectionCharacteristics sectionType) const; + + uint64_t calcSizeOfInitializedData() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA); + } + + uint64_t calcSizeOfUninitializedData() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA); + } + + uint64_t calcSizeOfCode() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_CODE); + } + + std::vector<std::unique_ptr<Chunk> > _chunks; + const PECOFFLinkingContext &_ctx; + uint32_t _numSections; + + // The size of the image in memory. This is initialized with + // _ctx.getPageSize(), as the first page starting at ImageBase is usually left + // unmapped. IIUC there's no technical reason to do so, but we'll follow that + // convention so that we don't produce odd-looking binary. + uint32_t _imageSizeInMemory; + + // The size of the image on disk. This is basically the sum of all chunks in + // the output file with paddings between them. + uint32_t _imageSizeOnDisk; + + // The map from atom to its relative virtual address. + std::map<const Atom *, uint64_t> _atomRva; +}; + +StringRef customSectionName(const DefinedAtom *atom) { + assert(atom->sectionChoice() == DefinedAtom::sectionCustomRequired); + StringRef s = atom->customSectionName(); + size_t pos = s.find('$'); + return (pos == StringRef::npos) ? s : s.substr(0, pos); +} + +StringRef chooseSectionByContent(const DefinedAtom *atom) { + switch (atom->contentType()) { + case DefinedAtom::typeCode: + return ".text"; + case DefinedAtom::typeZeroFill: + return ".bss"; + case DefinedAtom::typeData: + if (atom->permissions() == DefinedAtom::permR__) + return ".rdata"; + if (atom->permissions() == DefinedAtom::permRW_) + return ".data"; + break; + default: + break; + } + llvm::errs() << "Atom: contentType=" << atom->contentType() + << " permission=" << atom->permissions() << "\n"; + llvm::report_fatal_error("Failed to choose section based on content"); +} + +typedef std::map<StringRef, std::vector<const DefinedAtom *> > AtomVectorMap; + +void groupAtoms(const PECOFFLinkingContext &ctx, const File &file, + AtomVectorMap &result) { + for (const DefinedAtom *atom : file.defined()) { + if (atom->sectionChoice() == DefinedAtom::sectionCustomRequired) { + StringRef section = customSectionName(atom); + result[ctx.getOutputSectionName(section)].push_back(atom); + continue; + } + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + StringRef section = chooseSectionByContent(atom); + result[ctx.getOutputSectionName(section)].push_back(atom); + continue; + } + llvm_unreachable("Unknown section choice"); + } +} + +static const DefinedAtom *findTLSUsedSymbol(const PECOFFLinkingContext &ctx, + const File &file) { + StringRef sym = ctx.decorateSymbol("_tls_used"); + for (const DefinedAtom *atom : file.defined()) + if (atom->name() == sym) + return atom; + return nullptr; +} + +// Create all chunks that consist of the output file. +template <class PEHeader> +void PECOFFWriter::build(const File &linkedFile) { + AtomVectorMap atoms; + groupAtoms(_ctx, linkedFile, atoms); + + // Create file chunks and add them to the list. + auto *dosStub = new DOSStubChunk(_ctx); + auto *peHeader = new PEHeaderChunk<PEHeader>(_ctx); + auto *dataDirectory = new DataDirectoryChunk(); + auto *sectionTable = new SectionHeaderTableChunk(); + auto *stringTable = new StringTableChunk(); + addChunk(dosStub); + addChunk(peHeader); + addChunk(dataDirectory); + addChunk(sectionTable); + addChunk(stringTable); + + // Create sections and add the atoms to them. + for (auto i : atoms) { + StringRef sectionName = i.first; + std::vector<const DefinedAtom *> &contents = i.second; + std::unique_ptr<SectionChunk> section( + new AtomChunk(_ctx, sectionName, contents)); + if (section->size() > 0) + addSectionChunk(std::move(section), sectionTable, stringTable); + } + + // Build atom to its RVA map. + for (std::unique_ptr<Chunk> &cp : _chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->buildAtomRvaMap(_atomRva); + + // We know the addresses of all defined atoms that needs to be + // relocated. So we can create the ".reloc" section which contains + // all the relocation sites. + if (_ctx.getBaseRelocationEnabled()) { + std::unique_ptr<SectionChunk> baseReloc(new BaseRelocChunk(_chunks, _ctx)); + if (baseReloc->size()) { + SectionChunk &ref = *baseReloc; + addSectionChunk(std::move(baseReloc), sectionTable, stringTable); + dataDirectory->setField(DataDirectoryIndex::BASE_RELOCATION_TABLE, + ref.getVirtualAddress(), ref.size()); + } + } + + setImageSizeOnDisk(); + + if (stringTable->size()) { + peHeader->setPointerToSymbolTable(stringTable->fileOffset()); + peHeader->setNumberOfSymbols(1); + } + + for (std::unique_ptr<Chunk> &chunk : _chunks) { + SectionChunk *section = dyn_cast<SectionChunk>(chunk.get()); + if (!section) + continue; + if (section->getSectionName() == ".text") { + peHeader->setBaseOfCode(section->getVirtualAddress()); + + // Find the virtual address of the entry point symbol if any. PECOFF spec + // says that entry point for dll images is optional, in which case it must + // be set to 0. + if (_ctx.hasEntry()) { + AtomChunk *atom = cast<AtomChunk>(section); + uint64_t entryPointAddress = + atom->getAtomVirtualAddress(_ctx.getEntrySymbolName()); + + if (entryPointAddress) { + // NOTE: ARM NT assumes a pure THUMB execution, so adjust the entry + // point accordingly + if (_ctx.getMachineType() == llvm::COFF::IMAGE_FILE_MACHINE_ARMNT) + entryPointAddress |= 1; + peHeader->setAddressOfEntryPoint(entryPointAddress); + } + } else { + peHeader->setAddressOfEntryPoint(0); + } + } + StringRef name = section->getSectionName(); + if (name == ".data") { + peHeader->setBaseOfData(section->getVirtualAddress()); + continue; + } + DataDirectoryIndex ignore = DataDirectoryIndex(-1); + DataDirectoryIndex idx = llvm::StringSwitch<DataDirectoryIndex>(name) + .Case(".pdata", DataDirectoryIndex::EXCEPTION_TABLE) + .Case(".rsrc", DataDirectoryIndex::RESOURCE_TABLE) + .Case(".idata.a", DataDirectoryIndex::IAT) + .Case(".idata.d", DataDirectoryIndex::IMPORT_TABLE) + .Case(".edata", DataDirectoryIndex::EXPORT_TABLE) + .Case(".loadcfg", DataDirectoryIndex::LOAD_CONFIG_TABLE) + .Case(".didat.d", DataDirectoryIndex::DELAY_IMPORT_DESCRIPTOR) + .Default(ignore); + if (idx == ignore) + continue; + dataDirectory->setField(idx, section->getVirtualAddress(), section->size()); + } + + if (const DefinedAtom *atom = findTLSUsedSymbol(_ctx, linkedFile)) { + dataDirectory->setField(DataDirectoryIndex::TLS_TABLE, _atomRva[atom], + 0x18); + } + + // Now that we know the size and file offset of sections. Set the file + // header accordingly. + peHeader->setSizeOfCode(calcSizeOfCode()); + peHeader->setSizeOfInitializedData(calcSizeOfInitializedData()); + peHeader->setSizeOfUninitializedData(calcSizeOfUninitializedData()); + peHeader->setNumberOfSections(_numSections); + peHeader->setSizeOfImage(_imageSizeInMemory); + peHeader->setSizeOfHeaders(sectionTable->fileOffset() + sectionTable->size()); +} + +std::error_code PECOFFWriter::writeFile(const File &linkedFile, + StringRef path) { + if (_ctx.is64Bit()) { + this->build<llvm::object::pe32plus_header>(linkedFile); + } else { + this->build<llvm::object::pe32_header>(linkedFile); + } + + uint64_t totalSize = + _chunks.back()->fileOffset() + _chunks.back()->onDiskSize(); + std::unique_ptr<llvm::FileOutputBuffer> buffer; + std::error_code ec = llvm::FileOutputBuffer::create( + path, totalSize, buffer, llvm::FileOutputBuffer::F_executable); + if (ec) + return ec; + + for (std::unique_ptr<Chunk> &chunk : _chunks) + chunk->write(buffer->getBufferStart() + chunk->fileOffset()); + applyAllRelocations(buffer->getBufferStart()); + reorderSEHTableEntries(buffer->getBufferStart()); + DEBUG(printAllAtomAddresses()); + + if (_ctx.isDll()) + writeImportLibrary(_ctx); + + return buffer->commit(); +} + +/// Apply relocations to the output file buffer. This two pass. In the first +/// pass, we visit all atoms to create a map from atom to its virtual +/// address. In the second pass, we visit all relocation references to fix +/// up addresses in the buffer. +void PECOFFWriter::applyAllRelocations(uint8_t *bufferStart) { + // Create the list of section start addresses. It's needed for + // relocations of SECREL type. + std::vector<uint64_t> sectionRva; + for (auto &cp : _chunks) + if (SectionChunk *section = dyn_cast<SectionChunk>(&*cp)) + sectionRva.push_back(section->getVirtualAddress()); + + uint64_t base = _ctx.getBaseAddress(); + for (auto &cp : _chunks) { + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) { + switch (_ctx.getMachineType()) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + chunk->applyRelocationsARM(bufferStart, _atomRva, sectionRva, base); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + chunk->applyRelocationsX86(bufferStart, _atomRva, sectionRva, base); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + chunk->applyRelocationsX64(bufferStart, _atomRva, sectionRva, base); + break; + } + } + } +} + +/// Print atom VAs. Used only for debugging. +void PECOFFWriter::printAllAtomAddresses() const { + for (auto &cp : _chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->printAtomAddresses(_ctx.getBaseAddress()); +} + +void PECOFFWriter::reorderSEHTableEntries(uint8_t *bufferStart) { + auto machineType = _ctx.getMachineType(); + if (machineType == llvm::COFF::IMAGE_FILE_MACHINE_I386) + reorderSEHTableEntriesX86(bufferStart); + if (machineType == llvm::COFF::IMAGE_FILE_MACHINE_AMD64) + reorderSEHTableEntriesX64(bufferStart); +} + +/// It seems that the entries in .sxdata must be sorted. This function is called +/// after a COFF file image is created in memory and before it is written to +/// disk. It is safe to reorder entries at this stage because the contents of +/// the entries are RVAs and there's no reference to a .sxdata entry other than +/// to the beginning of the section. +void PECOFFWriter::reorderSEHTableEntriesX86(uint8_t *bufferStart) { + for (std::unique_ptr<Chunk> &chunk : _chunks) { + if (SectionChunk *section = dyn_cast<SectionChunk>(chunk.get())) { + if (section->getSectionName() == ".sxdata") { + int numEntries = section->size() / sizeof(ulittle32_t); + ulittle32_t *begin = reinterpret_cast<ulittle32_t *>(bufferStart + section->fileOffset()); + ulittle32_t *end = begin + numEntries; + std::sort(begin, end); + } + } + } +} + +/// The entries in .pdata must be sorted according to its BeginAddress field +/// value. It's safe to do it because of the same reason as .sxdata. +void PECOFFWriter::reorderSEHTableEntriesX64(uint8_t *bufferStart) { + for (std::unique_ptr<Chunk> &chunk : _chunks) { + if (SectionChunk *section = dyn_cast<SectionChunk>(chunk.get())) { + if (section->getSectionName() != ".pdata") + continue; + int numEntries = section->size() / sizeof(coff_runtime_function_x64); + coff_runtime_function_x64 *begin = + (coff_runtime_function_x64 *)(bufferStart + section->fileOffset()); + coff_runtime_function_x64 *end = begin + numEntries; + std::sort(begin, end, [](const coff_runtime_function_x64 &lhs, + const coff_runtime_function_x64 &rhs) { + return lhs.BeginAddress < rhs.BeginAddress; + }); + } + } +} + +void PECOFFWriter::addChunk(Chunk *chunk) { + _chunks.push_back(std::unique_ptr<Chunk>(chunk)); +} + +void PECOFFWriter::addSectionChunk(std::unique_ptr<SectionChunk> chunk, + SectionHeaderTableChunk *table, + StringTableChunk *stringTable) { + table->addSection(chunk.get()); + _numSections++; + + StringRef sectionName = chunk->getSectionName(); + if (sectionName.size() > llvm::COFF::NameSize) { + uint32_t stringTableOffset = stringTable->addSectionName(sectionName); + chunk->setStringTableOffset(stringTableOffset); + } + + // Compute and set the starting address of sections when loaded in + // memory. They are different from positions on disk because sections need + // to be sector-aligned on disk but page-aligned in memory. + _imageSizeInMemory = llvm::RoundUpToAlignment( + _imageSizeInMemory, chunk->memAlign()); + chunk->setVirtualAddress(_imageSizeInMemory); + _imageSizeInMemory = llvm::RoundUpToAlignment( + _imageSizeInMemory + chunk->size(), _ctx.getPageSize()); + _chunks.push_back(std::move(chunk)); +} + +void PECOFFWriter::setImageSizeOnDisk() { + for (auto &chunk : _chunks) { + // Compute and set the offset of the chunk in the output file. + _imageSizeOnDisk = + llvm::RoundUpToAlignment(_imageSizeOnDisk, chunk->align()); + chunk->setFileOffset(_imageSizeOnDisk); + _imageSizeOnDisk += chunk->onDiskSize(); + } +} + +uint64_t PECOFFWriter::calcSectionSize( + llvm::COFF::SectionCharacteristics sectionType) const { + uint64_t ret = 0; + for (auto &cp : _chunks) + if (SectionChunk *chunk = dyn_cast<SectionChunk>(&*cp)) + if (chunk->getCharacteristics() & sectionType) + ret += chunk->onDiskSize(); + return ret; +} + +} // end namespace pecoff + +std::unique_ptr<Writer> createWriterPECOFF(const PECOFFLinkingContext &info) { + return std::unique_ptr<Writer>(new pecoff::PECOFFWriter(info)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/YAML/CMakeLists.txt b/lib/ReaderWriter/YAML/CMakeLists.txt new file mode 100644 index 000000000000..b955baa94202 --- /dev/null +++ b/lib/ReaderWriter/YAML/CMakeLists.txt @@ -0,0 +1,6 @@ +add_llvm_library(lldYAML + ReaderWriterYAML.cpp + LINK_LIBS + lldCore + LLVMSupport + ) diff --git a/lib/ReaderWriter/YAML/Makefile b/lib/ReaderWriter/YAML/Makefile new file mode 100644 index 000000000000..739b6eae747a --- /dev/null +++ b/lib/ReaderWriter/YAML/Makefile @@ -0,0 +1,14 @@ +##===- lld/lib/ReaderWriter/YAML/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldYAML +USEDLIBS = lldCore.a + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp new file mode 100644 index 000000000000..868b9497c4cc --- /dev/null +++ b/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp @@ -0,0 +1,1358 @@ +//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <string> +#include <system_error> + +using llvm::yaml::MappingTraits; +using llvm::yaml::ScalarEnumerationTraits; +using llvm::yaml::ScalarTraits; +using llvm::yaml::IO; +using llvm::yaml::SequenceTraits; +using llvm::yaml::DocumentListTraits; + +using namespace lld; + +/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This +/// file just defines template specializations on the lld types which control +/// how the mapping is done to and from YAML. + +namespace { + +/// Used when writing yaml files. +/// In most cases, atoms names are unambiguous, so references can just +/// use the atom name as the target (e.g. target: foo). But in a few +/// cases that does not work, so ref-names are added. These are labels +/// used only in yaml. The labels do not exist in the Atom model. +/// +/// One need for ref-names are when atoms have no user supplied name +/// (e.g. c-string literal). Another case is when two object files with +/// identically named static functions are merged (ld -r) into one object file. +/// In that case referencing the function by name is ambiguous, so a unique +/// ref-name is added. +class RefNameBuilder { +public: + RefNameBuilder(const lld::File &file) + : _collisionCount(0), _unnamedCounter(0) { + // visit all atoms + for (const lld::DefinedAtom *atom : file.defined()) { + // Build map of atoms names to detect duplicates + if (!atom->name().empty()) + buildDuplicateNameMap(*atom); + + // Find references to unnamed atoms and create ref-names for them. + for (const lld::Reference *ref : *atom) { + // create refname for any unnamed reference target + const lld::Atom *target = ref->target(); + if ((target != nullptr) && target->name().empty()) { + std::string storage; + llvm::raw_string_ostream buffer(storage); + buffer << llvm::format("L%03d", _unnamedCounter++); + StringRef newName = copyString(buffer.str()); + _refNames[target] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "unnamed atom: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() << ", " + << newName.size() << ")\n"); + } + } + } + for (const lld::UndefinedAtom *undefAtom : file.undefined()) { + buildDuplicateNameMap(*undefAtom); + } + for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) { + buildDuplicateNameMap(*shlibAtom); + } + for (const lld::AbsoluteAtom *absAtom : file.absolute()) { + if (!absAtom->name().empty()) + buildDuplicateNameMap(*absAtom); + } + } + + void buildDuplicateNameMap(const lld::Atom &atom) { + assert(!atom.name().empty()); + NameToAtom::iterator pos = _nameMap.find(atom.name()); + if (pos != _nameMap.end()) { + // Found name collision, give each a unique ref-name. + std::string Storage; + llvm::raw_string_ostream buffer(Storage); + buffer << atom.name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName = copyString(buffer.str()); + _refNames[&atom] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() + << ", " << newName.size() << ")\n"); + const lld::Atom *prevAtom = pos->second; + AtomToRefName::iterator pos2 = _refNames.find(prevAtom); + if (pos2 == _refNames.end()) { + // Only create ref-name for previous if none already created. + std::string Storage2; + llvm::raw_string_ostream buffer2(Storage2); + buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName2 = copyString(buffer2.str()); + _refNames[prevAtom] = newName2; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName2 << "' (" + << (const void *)newName2.data() << ", " + << newName2.size() << ")\n"); + } + } else { + // First time we've seen this name, just add it to map. + _nameMap[atom.name()] = &atom; + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "atom name seen for first time: '" + << atom.name() << "' (" + << (const void *)atom.name().data() + << ", " << atom.name().size() << ")\n"); + } + } + + bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); } + + StringRef refName(const lld::Atom *atom) { + return _refNames.find(atom)->second; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + typedef llvm::DenseMap<const lld::Atom *, std::string> AtomToRefName; + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when RefNameBuilder is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + unsigned int _collisionCount; + unsigned int _unnamedCounter; + NameToAtom _nameMap; + AtomToRefName _refNames; + llvm::BumpPtrAllocator _storage; +}; + +/// Used when reading yaml files to find the target of a reference +/// that could be a name or ref-name. +class RefNameResolver { +public: + RefNameResolver(const lld::File *file, IO &io); + + const lld::Atom *lookup(StringRef name) const { + NameToAtom::const_iterator pos = _nameMap.find(name); + if (pos != _nameMap.end()) + return pos->second; + _io.setError(Twine("no such atom name: ") + name); + return nullptr; + } + + /// \brief Lookup a group parent when there is a reference of type + /// kindGroupChild. If there was no group-parent produce an appropriate + /// error. + const lld::Atom *lookupGroupParent(StringRef name) const { + NameToAtom::const_iterator pos = _groupMap.find(name); + if (pos != _groupMap.end()) + return pos->second; + _io.setError(Twine("no such group name: ") + name); + return nullptr; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + void add(StringRef name, const lld::Atom *atom) { + if (const lld::DefinedAtom *da = dyn_cast<DefinedAtom>(atom)) { + if (da->isGroupParent()) { + if (_groupMap.count(name)) { + _io.setError(Twine("duplicate group name: ") + name); + } else { + _groupMap[name] = atom; + } + return; + } + } + if (_nameMap.count(name)) { + _io.setError(Twine("duplicate atom name: ") + name); + } else { + _nameMap[name] = atom; + } + } + + IO &_io; + NameToAtom _nameMap; + NameToAtom _groupMap; +}; + +// Used in NormalizedFile to hold the atoms lists. +template <typename T> class AtomList : public lld::File::atom_collection<T> { +public: + virtual lld::File::atom_iterator<T> begin() const { + return lld::File::atom_iterator<T>( + *this, + _atoms.empty() ? 0 : reinterpret_cast<const void *>(_atoms.data())); + } + virtual lld::File::atom_iterator<T> end() const { + return lld::File::atom_iterator<T>( + *this, _atoms.empty() ? 0 : reinterpret_cast<const void *>( + _atoms.data() + _atoms.size())); + } + virtual const T *deref(const void *it) const { + return *reinterpret_cast<const T *const *>(it); + } + virtual void next(const void *&it) const { + const T *const *p = reinterpret_cast<const T *const *>(it); + ++p; + it = reinterpret_cast<const void *>(p); + } + virtual void push_back(const T *element) { _atoms.push_back(element); } + virtual uint64_t size() const { return _atoms.size(); } + std::vector<const T *> _atoms; +}; + +/// Mapping of kind: field in yaml files. +enum FileKinds { + fileKindObjectAtoms, // atom based object file encoded in yaml + fileKindArchive, // static archive library encoded in yaml + fileKindObjectELF, // ELF object files encoded in yaml + fileKindObjectMachO // mach-o object files encoded in yaml +}; + +struct ArchMember { + FileKinds _kind; + StringRef _name; + const lld::File *_content; +}; + +// The content bytes in a DefinedAtom are just uint8_t but we want +// special formatting, so define a strong type. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8) + +// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be +// more readable than just true/false. +LLVM_YAML_STRONG_TYPEDEF(bool, ShlibCanBeNull) + +// lld::Reference::Kind is a tuple of <namespace, arch, value>. +// For yaml, we just want one string that encapsulates the tuple. +struct RefKind { + Reference::KindNamespace ns; + Reference::KindArch arch; + Reference::KindValue value; +}; + +} // namespace anon + +LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) +LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) +// Always write DefinedAtoms content bytes as a flow sequence. +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// This is a custom formatter for RefKind +template <> struct ScalarTraits<RefKind> { + static void output(const RefKind &kind, void *ctxt, raw_ostream &out) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + StringRef str; + if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value, + str)) + out << str; + else + out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value; + } + + static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch, + kind.value)) + return StringRef(); + return StringRef("unknown reference kind"); + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<lld::File::Kind> { + static void enumeration(IO &io, lld::File::Kind &value) { + io.enumCase(value, "object", lld::File::kindObject); + io.enumCase(value, "shared-library", lld::File::kindSharedLibrary); + io.enumCase(value, "static-library", lld::File::kindArchiveLibrary); + } +}; + +template <> struct ScalarEnumerationTraits<lld::Atom::Scope> { + static void enumeration(IO &io, lld::Atom::Scope &value) { + io.enumCase(value, "global", lld::Atom::scopeGlobal); + io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit); + io.enumCase(value, "static", lld::Atom::scopeTranslationUnit); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::SectionChoice> { + static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) { + io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent); + io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred); + io.enumCase(value, "custom-required", + lld::DefinedAtom::sectionCustomRequired); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Interposable> { + static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) { + io.enumCase(value, "no", DefinedAtom::interposeNo); + io.enumCase(value, "yes", DefinedAtom::interposeYes); + io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Merge> { + static void enumeration(IO &io, lld::DefinedAtom::Merge &value) { + io.enumCase(value, "no", lld::DefinedAtom::mergeNo); + io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative); + io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak); + io.enumCase(value, "as-addressed-weak", + lld::DefinedAtom::mergeAsWeakAndAddressUsed); + io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent); + io.enumCase(value, "same-name-and-size", + lld::DefinedAtom::mergeSameNameAndSize); + io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DeadStripKind> { + static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) { + io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal); + io.enumCase(value, "never", lld::DefinedAtom::deadStripNever); + io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DynamicExport> { + static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) { + io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal); + io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::CodeModel> { + static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) { + io.enumCase(value, "none", lld::DefinedAtom::codeNA); + io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC); + io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro); + io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC); + io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16); + io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::DefinedAtom::ContentPermissions> { + static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) { + io.enumCase(value, "---", lld::DefinedAtom::perm___); + io.enumCase(value, "r--", lld::DefinedAtom::permR__); + io.enumCase(value, "r-x", lld::DefinedAtom::permR_X); + io.enumCase(value, "rw-", lld::DefinedAtom::permRW_); + io.enumCase(value, "rwx", lld::DefinedAtom::permRWX); + io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L); + io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::ContentType> { + static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) { + io.enumCase(value, "unknown", DefinedAtom::typeUnknown); + io.enumCase(value, "code", DefinedAtom::typeCode); + io.enumCase(value, "stub", DefinedAtom::typeStub); + io.enumCase(value, "constant", DefinedAtom::typeConstant); + io.enumCase(value, "data", DefinedAtom::typeData); + io.enumCase(value, "quick-data", DefinedAtom::typeDataFast); + io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill); + io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast); + io.enumCase(value, "const-data", DefinedAtom::typeConstData); + io.enumCase(value, "got", DefinedAtom::typeGOT); + io.enumCase(value, "resolver", DefinedAtom::typeResolver); + io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland); + io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim); + io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper); + io.enumCase(value, "c-string", DefinedAtom::typeCString); + io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String); + io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI); + io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA); + io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4); + io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8); + io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16); + io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer); + io.enumCase(value, "lazy-dylib-pointer", + DefinedAtom::typeLazyDylibPointer); + io.enumCase(value, "cfstring", DefinedAtom::typeCFString); + io.enumCase(value, "initializer-pointer", + DefinedAtom::typeInitializerPtr); + io.enumCase(value, "terminator-pointer", + DefinedAtom::typeTerminatorPtr); + io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr); + io.enumCase(value, "objc-class-pointer", + DefinedAtom::typeObjCClassPtr); + io.enumCase(value, "objc-category-list", + DefinedAtom::typeObjC2CategoryList); + io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class); + io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF); + io.enumCase(value, "interposing-tuples", + DefinedAtom::typeInterposingTuples); + io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO); + io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo); + io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo); + io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV); + io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData); + io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill); + io.enumCase(value, "tlv-initializer-ptr", + DefinedAtom::typeTLVInitializerPtr); + io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader); + io.enumCase(value, "thread-data", DefinedAtom::typeThreadData); + io.enumCase(value, "thread-zero-fill",DefinedAtom::typeThreadZeroFill); + io.enumCase(value, "ro-note", DefinedAtom::typeRONote); + io.enumCase(value, "rw-note", DefinedAtom::typeRWNote); + io.enumCase(value, "no-alloc", DefinedAtom::typeNoAlloc); + io.enumCase(value, "group-comdat", DefinedAtom::typeGroupComdat); + io.enumCase(value, "gnu-linkonce", DefinedAtom::typeGnuLinkOnce); + } +}; + +template <> struct ScalarEnumerationTraits<lld::UndefinedAtom::CanBeNull> { + static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) { + io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever); + io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime); + io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime); + } +}; + +template <> struct ScalarEnumerationTraits<ShlibCanBeNull> { + static void enumeration(IO &io, ShlibCanBeNull &value) { + io.enumCase(value, "never", false); + io.enumCase(value, "at-runtime", true); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::SharedLibraryAtom::Type> { + static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) { + io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code); + io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data); + io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown); + } +}; + +/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look +/// like: +/// 2^3 # 8-byte aligned +/// 7 mod 2^4 # 16-byte aligned plus 7 bytes +template <> struct ScalarTraits<lld::DefinedAtom::Alignment> { + static void output(const lld::DefinedAtom::Alignment &value, void *ctxt, + raw_ostream &out) { + if (value.modulus == 0) { + out << llvm::format("2^%d", value.powerOf2); + } else { + out << llvm::format("%d mod 2^%d", value.modulus, value.powerOf2); + } + } + + static StringRef input(StringRef scalar, void *ctxt, + lld::DefinedAtom::Alignment &value) { + value.modulus = 0; + size_t modStart = scalar.find("mod"); + if (modStart != StringRef::npos) { + StringRef modStr = scalar.slice(0, modStart); + modStr = modStr.rtrim(); + unsigned int modulus; + if (modStr.getAsInteger(0, modulus)) { + return "malformed alignment modulus"; + } + value.modulus = modulus; + scalar = scalar.drop_front(modStart + 3); + scalar = scalar.ltrim(); + } + if (!scalar.startswith("2^")) { + return "malformed alignment"; + } + StringRef powerStr = scalar.drop_front(2); + unsigned int power; + if (powerStr.getAsInteger(0, power)) { + return "malformed alignment power"; + } + value.powerOf2 = power; + if (value.modulus > (1 << value.powerOf2)) { + return "malformed alignment, modulus too large for power"; + } + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<FileKinds> { + static void enumeration(IO &io, FileKinds &value) { + io.enumCase(value, "object", fileKindObjectAtoms); + io.enumCase(value, "archive", fileKindArchive); + io.enumCase(value, "object-elf", fileKindObjectELF); + io.enumCase(value, "object-mach-o", fileKindObjectMachO); + } +}; + +template <> struct MappingTraits<ArchMember> { + static void mapping(IO &io, ArchMember &member) { + io.mapOptional("kind", member._kind, fileKindObjectAtoms); + io.mapOptional("name", member._name); + io.mapRequired("content", member._content); + } +}; + +// Declare that an AtomList is a yaml sequence. +template <typename T> struct SequenceTraits<AtomList<T> > { + static size_t size(IO &io, AtomList<T> &seq) { return seq._atoms.size(); } + static const T *&element(IO &io, AtomList<T> &seq, size_t index) { + if (index >= seq._atoms.size()) + seq._atoms.resize(index + 1); + return seq._atoms[index]; + } +}; + +// Used to allow DefinedAtom content bytes to be a flow sequence of +// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A) +template <> struct ScalarTraits<ImplicitHex8> { + static void output(const ImplicitHex8 &val, void *, raw_ostream &out) { + uint8_t num = val; + out << llvm::format("%02X", num); + } + + static StringRef input(StringRef str, void *, ImplicitHex8 &val) { + unsigned long long n; + if (getAsUnsignedInteger(str, 16, n)) + return "invalid two-digit-hex number"; + if (n > 0xFF) + return "out of range two-digit-hex number"; + val = n; + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +// YAML conversion for std::vector<const lld::File*> +template <> struct DocumentListTraits<std::vector<const lld::File *> > { + static size_t size(IO &io, std::vector<const lld::File *> &seq) { + return seq.size(); + } + static const lld::File *&element(IO &io, std::vector<const lld::File *> &seq, + size_t index) { + if (index >= seq.size()) + seq.resize(index + 1); + return seq[index]; + } +}; + +// YAML conversion for const lld::File* +template <> struct MappingTraits<const lld::File *> { + + class NormArchiveFile : public lld::ArchiveLibraryFile { + public: + NormArchiveFile(IO &io) : ArchiveLibraryFile(""), _path() {} + NormArchiveFile(IO &io, const lld::File *file) + : ArchiveLibraryFile(file->path()), _path(file->path()) { + // If we want to support writing archives, this constructor would + // need to populate _members. + } + + const lld::File *denormalize(IO &io) { return this; } + + const atom_collection<lld::DefinedAtom> &defined() const override { + return _noDefinedAtoms; + } + const atom_collection<lld::UndefinedAtom> &undefined() const override { + return _noUndefinedAtoms; + } + virtual const atom_collection<lld::SharedLibraryAtom> & + sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + const atom_collection<lld::AbsoluteAtom> &absolute() const override { + return _noAbsoluteAtoms; + } + File *find(StringRef name, bool dataSymbolOnly) override { + for (const ArchMember &member : _members) { + for (const lld::DefinedAtom *atom : member._content->defined()) { + if (name == atom->name()) { + if (!dataSymbolOnly) + return const_cast<File *>(member._content); + switch (atom->contentType()) { + case lld::DefinedAtom::typeData: + case lld::DefinedAtom::typeZeroFill: + return const_cast<File *>(member._content); + default: + break; + } + } + } + } + return nullptr; + } + + virtual std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + return std::error_code(); + } + + StringRef _path; + std::vector<ArchMember> _members; + }; + + class NormalizedFile : public lld::File { + public: + NormalizedFile(IO &io) : File("", kindObject), _io(io), _rnb(nullptr) {} + NormalizedFile(IO &io, const lld::File *file) + : File(file->path(), kindObject), _io(io), + _rnb(new RefNameBuilder(*file)), _path(file->path()) { + for (const lld::DefinedAtom *a : file->defined()) + _definedAtoms.push_back(a); + for (const lld::UndefinedAtom *a : file->undefined()) + _undefinedAtoms.push_back(a); + for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) + _sharedLibraryAtoms.push_back(a); + for (const lld::AbsoluteAtom *a : file->absolute()) + _absoluteAtoms.push_back(a); + } + const lld::File *denormalize(IO &io); + + const atom_collection<lld::DefinedAtom> &defined() const override { + return _definedAtoms; + } + const atom_collection<lld::UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + virtual const atom_collection<lld::SharedLibraryAtom> & + sharedLibrary() const override { + return _sharedLibraryAtoms; + } + const atom_collection<lld::AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when File is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + IO &_io; + std::unique_ptr<RefNameBuilder> _rnb; + StringRef _path; + AtomList<lld::DefinedAtom> _definedAtoms; + AtomList<lld::UndefinedAtom> _undefinedAtoms; + AtomList<lld::SharedLibraryAtom> _sharedLibraryAtoms; + AtomList<lld::AbsoluteAtom> _absoluteAtoms; + llvm::BumpPtrAllocator _storage; + }; + + static void mapping(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + // Let any register tag handler process this. + if (info->_registry && info->_registry->handleTaggedDoc(io, file)) + return; + // If no registered handler claims this tag and there is no tag, + // grandfather in as "!native". + if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map")) + mappingAtoms(io, file); + } + + static void mappingAtoms(IO &io, const lld::File *&file) { + MappingNormalizationHeap<NormalizedFile, const lld::File *> keys(io, file); + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + info->_file = keys.operator->(); + + io.mapOptional("path", keys->_path); + io.mapOptional("defined-atoms", keys->_definedAtoms); + io.mapOptional("undefined-atoms", keys->_undefinedAtoms); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms); + io.mapOptional("absolute-atoms", keys->_absoluteAtoms); + } + + static void mappingArchive(IO &io, const lld::File *&file) { + MappingNormalizationHeap<NormArchiveFile, const lld::File *> keys(io, file); + + io.mapOptional("path", keys->_path); + io.mapOptional("members", keys->_members); + } +}; + +// YAML conversion for const lld::Reference* +template <> struct MappingTraits<const lld::Reference *> { + + class NormalizedReference : public lld::Reference { + public: + NormalizedReference(IO &io) + : lld::Reference(lld::Reference::KindNamespace::all, + lld::Reference::KindArch::all, 0), + _target(nullptr), _targetName(), _offset(0), _addend(0), _tag(0) {} + + NormalizedReference(IO &io, const lld::Reference *ref) + : lld::Reference(ref->kindNamespace(), ref->kindArch(), + ref->kindValue()), + _target(nullptr), _targetName(targetName(io, ref)), + _offset(ref->offsetInAtom()), _addend(ref->addend()), + _tag(ref->tag()) { + _mappedKind.ns = ref->kindNamespace(); + _mappedKind.arch = ref->kindArch(); + _mappedKind.value = ref->kindValue(); + } + + const lld::Reference *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_targetName.empty()) + _targetName = f->copyString(_targetName); + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "created Reference to name: '" + << _targetName << "' (" + << (const void *)_targetName.data() + << ", " << _targetName.size() << ")\n"); + setKindNamespace(_mappedKind.ns); + setKindArch(_mappedKind.arch); + setKindValue(_mappedKind.value); + return this; + } + void bind(const RefNameResolver &); + static StringRef targetName(IO &io, const lld::Reference *ref); + + uint64_t offsetInAtom() const override { return _offset; } + const lld::Atom *target() const override { return _target; } + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const lld::Atom *a) override { _target = a; } + + const lld::Atom *_target; + StringRef _targetName; + uint32_t _offset; + Addend _addend; + RefKind _mappedKind; + uint32_t _tag; + }; + + static void mapping(IO &io, const lld::Reference *&ref) { + MappingNormalizationHeap<NormalizedReference, const lld::Reference *> keys( + io, ref); + + io.mapRequired("kind", keys->_mappedKind); + io.mapOptional("offset", keys->_offset); + io.mapOptional("target", keys->_targetName); + io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0); + io.mapOptional("tag", keys->_tag, 0u); + } +}; + +// YAML conversion for const lld::DefinedAtom* +template <> struct MappingTraits<const lld::DefinedAtom *> { + + class NormalizedAtom : public lld::DefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _refName(), _contentType(), + _alignment(0), _content(), _references(), _isGroupChild(false) { + static uint32_t ordinalCounter = 1; + _ordinal = ordinalCounter++; + } + NormalizedAtom(IO &io, const lld::DefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), _refName(), + _scope(atom->scope()), _interpose(atom->interposable()), + _merge(atom->merge()), _contentType(atom->contentType()), + _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), + _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()), + _codeModel(atom->codeModel()), + _permissions(atom->permissions()), _size(atom->size()), + _sectionName(atom->customSectionName()), + _sectionSize(atom->sectionSize()) { + for (const lld::Reference *r : *atom) + _references.push_back(r); + if (!atom->occupiesDiskSpace()) + return; + ArrayRef<uint8_t> cont = atom->rawContent(); + _content.reserve(cont.size()); + for (uint8_t x : cont) + _content.push_back(x); + } + const lld::DefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_refName.empty()) + _refName = f->copyString(_refName); + if (!_sectionName.empty()) + _sectionName = f->copyString(_sectionName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created DefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + void bind(const RefNameResolver &); + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t size() const override { return _size; } + Scope scope() const override { return _scope; } + Interposable interposable() const override { return _interpose; } + Merge merge() const override { return _merge; } + ContentType contentType() const override { return _contentType; } + Alignment alignment() const override { return _alignment; } + SectionChoice sectionChoice() const override { return _sectionChoice; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + DeadStripKind deadStrip() const override { return _deadStrip; } + DynamicExport dynamicExport() const override { return _dynamicExport; } + CodeModel codeModel() const override { return _codeModel; } + ContentPermissions permissions() const override { return _permissions; } + void setGroupChild(bool val) { _isGroupChild = val; } + bool isGroupChild() const { return _isGroupChild; } + ArrayRef<uint8_t> rawContent() const override { + if (!occupiesDiskSpace()) + return ArrayRef<uint8_t>(); + return ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(_content.data()), _content.size()); + } + + uint64_t ordinal() const override { return _ordinal; } + + reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + reference_iterator end() const override { + uintptr_t index = _references.size(); + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + const lld::Reference *derefIterator(const void *it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + assert(index < _references.size()); + return _references[index]; + } + void incrementIterator(const void *&it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + ++index; + it = reinterpret_cast<const void *>(index); + } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Interposable _interpose; + Merge _merge; + ContentType _contentType; + Alignment _alignment; + SectionChoice _sectionChoice; + DeadStripKind _deadStrip; + DynamicExport _dynamicExport; + CodeModel _codeModel; + ContentPermissions _permissions; + uint32_t _ordinal; + std::vector<ImplicitHex8> _content; + uint64_t _size; + StringRef _sectionName; + uint64_t _sectionSize; + std::vector<const lld::Reference *> _references; + bool _isGroupChild; + }; + + static void mapping(IO &io, const lld::DefinedAtom *&atom) { + MappingNormalizationHeap<NormalizedAtom, const lld::DefinedAtom *> keys( + io, atom); + if (io.outputting()) { + // If writing YAML, check if atom needs a ref-name. + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapOptional("name", keys->_name, StringRef()); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope, + DefinedAtom::scopeTranslationUnit); + io.mapOptional("type", keys->_contentType, + DefinedAtom::typeCode); + io.mapOptional("content", keys->_content); + io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size()); + io.mapOptional("interposable", keys->_interpose, + DefinedAtom::interposeNo); + io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo); + io.mapOptional("alignment", keys->_alignment, + DefinedAtom::Alignment(0)); + io.mapOptional("section-choice", keys->_sectionChoice, + DefinedAtom::sectionBasedOnContent); + io.mapOptional("section-name", keys->_sectionName, StringRef()); + io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0); + io.mapOptional("dead-strip", keys->_deadStrip, + DefinedAtom::deadStripNormal); + io.mapOptional("dynamic-export", keys->_dynamicExport, + DefinedAtom::dynamicExportNormal); + io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA); + // default permissions based on content type + io.mapOptional("permissions", keys->_permissions, + DefinedAtom::permissions( + keys->_contentType)); + io.mapOptional("references", keys->_references); + } +}; + +// YAML conversion for const lld::UndefinedAtom* +template <> struct MappingTraits<const lld::UndefinedAtom *> { + + class NormalizedAtom : public lld::UndefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _canBeNull(canBeNullNever), + _fallback(nullptr) {} + + NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _canBeNull(atom->canBeNull()), _fallback(atom->fallback()) {} + + const lld::UndefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created UndefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() << ", " + << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return _canBeNull; } + const UndefinedAtom *fallback() const override { return _fallback; } + + const lld::File &_file; + StringRef _name; + CanBeNull _canBeNull; + const UndefinedAtom *_fallback; + }; + + static void mapping(IO &io, const lld::UndefinedAtom *&atom) { + MappingNormalizationHeap<NormalizedAtom, const lld::UndefinedAtom *> keys( + io, atom); + + io.mapRequired("name", keys->_name); + io.mapOptional("can-be-null", keys->_canBeNull, + lld::UndefinedAtom::canBeNullNever); + io.mapOptional("fallback", keys->_fallback, + (const lld::UndefinedAtom *)nullptr); + } +}; + +// YAML conversion for const lld::SharedLibraryAtom* +template <> struct MappingTraits<const lld::SharedLibraryAtom *> { + + class NormalizedAtom : public lld::SharedLibraryAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _loadName(), _canBeNull(false), + _type(Type::Unknown), _size(0) {} + NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), + _type(atom->type()), _size(atom->size()) {} + + const lld::SharedLibraryAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_loadName.empty()) + _loadName = f->copyString(_loadName); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created SharedLibraryAtom named: '" + << _name << "' (" + << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + StringRef loadName() const override { return _loadName; } + bool canBeNullAtRuntime() const override { return _canBeNull; } + Type type() const override { return _type; } + uint64_t size() const override { return _size; } + + const lld::File &_file; + StringRef _name; + StringRef _loadName; + ShlibCanBeNull _canBeNull; + Type _type; + uint64_t _size; + }; + + static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) { + + MappingNormalizationHeap<NormalizedAtom, const lld::SharedLibraryAtom *> + keys(io, atom); + + io.mapRequired("name", keys->_name); + io.mapOptional("load-name", keys->_loadName); + io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false); + io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code); + io.mapOptional("size", keys->_size, uint64_t(0)); + } +}; + +// YAML conversion for const lld::AbsoluteAtom* +template <> struct MappingTraits<const lld::AbsoluteAtom *> { + + class NormalizedAtom : public lld::AbsoluteAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _scope(), _value(0) {} + NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _value(atom->value()) {} + const lld::AbsoluteAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created AbsoluteAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t value() const override { return _value; } + Scope scope() const override { return _scope; } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Hex64 _value; + }; + + static void mapping(IO &io, const lld::AbsoluteAtom *&atom) { + MappingNormalizationHeap<NormalizedAtom, const lld::AbsoluteAtom *> keys( + io, atom); + + if (io.outputting()) { + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapRequired("name", keys->_name); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope); + io.mapRequired("value", keys->_value); + } +}; + +} // namespace llvm +} // namespace yaml + +RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + for (const lld::DefinedAtom *a : file->defined()) { + const auto *na = (const NormalizedAtom *)a; + if (!na->_refName.empty()) + add(na->_refName, a); + else if (!na->_name.empty()) + add(na->_name, a); + } + + for (const lld::UndefinedAtom *a : file->undefined()) + add(a->name(), a); + + for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) + add(a->name(), a); + + typedef MappingTraits<const lld::AbsoluteAtom *>::NormalizedAtom NormAbsAtom; + for (const lld::AbsoluteAtom *a : file->absolute()) { + const auto *na = (const NormAbsAtom *)a; + if (na->_refName.empty()) + add(na->_name, a); + else + add(na->_refName, a); + } +} + +inline const lld::File * +MappingTraits<const lld::File *>::NormalizedFile::denormalize(IO &io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + + RefNameResolver nameResolver(this, io); + // Now that all atoms are parsed, references can be bound. + for (const lld::DefinedAtom *a : this->defined()) { + auto *normAtom = (NormalizedAtom *)const_cast<DefinedAtom *>(a); + normAtom->bind(nameResolver); + } + + _definedAtoms._atoms.erase( + std::remove_if(_definedAtoms._atoms.begin(), _definedAtoms._atoms.end(), + [](const DefinedAtom *a) { + return ((const NormalizedAtom *)a)->isGroupChild(); + }), + _definedAtoms._atoms.end()); + + return this; +} + +inline void MappingTraits<const lld::DefinedAtom *>::NormalizedAtom::bind( + const RefNameResolver &resolver) { + typedef MappingTraits<const lld::Reference *>::NormalizedReference + NormalizedReference; + for (const lld::Reference *ref : _references) { + auto *normRef = (NormalizedReference *)const_cast<Reference *>(ref); + normRef->bind(resolver); + } +} + +inline void MappingTraits<const lld::Reference *>::NormalizedReference::bind( + const RefNameResolver &resolver) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom NormalizedAtom; + + _target = resolver.lookup(_targetName); + + if (_mappedKind.ns == lld::Reference::KindNamespace::all && + _mappedKind.value == lld::Reference::kindGroupChild) { + ((NormalizedAtom *)const_cast<Atom *>(_target))->setGroupChild(true); + } +} + +inline StringRef +MappingTraits<const lld::Reference *>::NormalizedReference::targetName( + IO &io, const lld::Reference *ref) { + if (ref->target() == nullptr) + return StringRef(); + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + RefNameBuilder &rnb = *f->_rnb; + if (rnb.hasRefName(ref->target())) + return rnb.refName(ref->target()); + return ref->target()->name(); +} + +namespace lld { +namespace yaml { + +class Writer : public lld::Writer { +public: + Writer(const LinkingContext &context) : _context(context) {} + + std::error_code writeFile(const lld::File &file, StringRef outPath) override { + // Create stream to path. + std::error_code ec; + llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::F_Text); + if (ec) + return ec; + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._linkingContext = &_context; + yamlContext._registry = &_context.registry(); + llvm::yaml::Output yout(out, &yamlContext); + + // Write yaml output. + const lld::File *fileRef = &file; + yout << fileRef; + + return std::error_code(); + } + +private: + const LinkingContext &_context; +}; + +} // end namespace yaml + +namespace { + +/// Handles !native tagged yaml documents. +class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!native")) { + MappingTraits<const lld::File *>::mappingAtoms(io, file); + return true; + } + return false; + } +}; + + +/// Handles !archive tagged yaml documents. +class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!archive")) { + MappingTraits<const lld::File *>::mappingArchive(io, file); + return true; + } + return false; + } +}; + + + +class YAMLReader : public Reader { +public: + YAMLReader(const Registry ®istry) : _registry(registry) {} + + bool canParse(file_magic, StringRef ext, const MemoryBuffer &) const override { + return (ext.equals(".objtxt") || ext.equals(".yaml")); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + // Create YAML Input Reader. + YamlContext yamlContext; + yamlContext._registry = &_registry; + yamlContext._path = mb->getBufferIdentifier(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill vector with File objects created by parsing yaml. + std::vector<const lld::File *> createdFiles; + yin >> createdFiles; + + // Error out now if there were parsing errors. + if (yin.error()) + return make_error_code(lld::YamlReaderError::illegal_value); + + std::shared_ptr<MemoryBuffer> smb(mb.release()); + for (const File *file : createdFiles) { + // Note: loadFile() should return vector of *const* File + File *f = const_cast<File *>(file); + f->setLastError(std::error_code()); + f->setSharedMemoryBuffer(smb); + result.emplace_back(f); + } + return make_error_code(lld::YamlReaderError::success); + } + +private: + const Registry &_registry; +}; + +} // anonymous namespace + +void Registry::addSupportYamlFiles() { + add(std::unique_ptr<Reader>(new YAMLReader(*this))); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new NativeYamlIOTaggedDocumentHandler())); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new ArchiveYamlIOTaggedDocumentHandler())); +} + +std::unique_ptr<Writer> createWriterYAML(const LinkingContext &context) { + return std::unique_ptr<Writer>(new lld::yaml::Writer(context)); +} + +} // end namespace lld |