diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2015-03-24 21:31:36 +0000 |
commit | fb911942f1434f3d1750f83f25f5e42c80e60638 (patch) | |
tree | 1678c4a4f0182e4029a86d135aa4a1b7d09e3c41 /lib/ReaderWriter/PECOFF |
Notes
Diffstat (limited to 'lib/ReaderWriter/PECOFF')
22 files changed, 5470 insertions, 0 deletions
diff --git a/lib/ReaderWriter/PECOFF/Atoms.h b/lib/ReaderWriter/PECOFF/Atoms.h new file mode 100644 index 0000000000000..257edc17884b7 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Atoms.h @@ -0,0 +1,312 @@ +//===- lib/ReaderWriter/PECOFF/Atoms.h ------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_ATOMS_H +#define LLD_READER_WRITER_PE_COFF_ATOMS_H + +#include "lld/Core/File.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/COFF.h" +#include <vector> + +namespace lld { +namespace pecoff { +class COFFDefinedAtom; + +class COFFUndefinedAtom : public UndefinedAtom { +public: + COFFUndefinedAtom(const File &file, StringRef name, + const UndefinedAtom *fallback = nullptr) + : _owningFile(file), _name(name), _fallback(fallback) {} + + const File &file() const override { return _owningFile; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return CanBeNull::canBeNullNever; } + const UndefinedAtom *fallback() const override { return _fallback; } + +private: + const File &_owningFile; + StringRef _name; + const UndefinedAtom *_fallback; +}; + +/// The base class of all COFF defined atoms. A derived class of +/// COFFBaseDefinedAtom may represent atoms read from a file or atoms created +/// by the linker. An example of the latter case is the jump table for symbols +/// in a DLL. +class COFFBaseDefinedAtom : public DefinedAtom { +public: + enum class Kind { + File, + Internal + }; + + const File &file() const override { return _file; } + StringRef name() const override { return _name; } + Interposable interposable() const override { return interposeNo; } + Merge merge() const override { return mergeNo; } + Alignment alignment() const override { return Alignment(0); } + StringRef customSectionName() const override { return ""; } + DeadStripKind deadStrip() const override { return deadStripNormal; } + + Kind getKind() const { return _kind; } + + void addReference(std::unique_ptr<SimpleReference> reference) { + _references.push_back(std::move(reference)); + } + + reference_iterator begin() const override { + return reference_iterator(*this, reinterpret_cast<const void *>(0)); + } + + reference_iterator end() const override { + return reference_iterator( + *this, reinterpret_cast<const void *>(_references.size())); + } + +protected: + COFFBaseDefinedAtom(const File &file, StringRef name, Kind kind) + : _file(file), _name(name), _kind(kind) {} + +private: + const Reference *derefIterator(const void *iter) const override { + size_t index = reinterpret_cast<size_t>(iter); + return _references[index].get(); + } + + void incrementIterator(const void *&iter) const override { + size_t index = reinterpret_cast<size_t>(iter); + iter = reinterpret_cast<const void *>(index + 1); + } + + const File &_file; + StringRef _name; + Kind _kind; + std::vector<std::unique_ptr<SimpleReference>> _references; +}; + +/// This is the root class of the atom read from a file. This class have two +/// subclasses; one for the regular atom and another for the BSS atom. +class COFFDefinedFileAtom : public COFFBaseDefinedAtom { +public: + COFFDefinedFileAtom(const File &file, StringRef name, StringRef sectionName, + uint64_t sectionSize, Scope scope, + ContentType contentType, ContentPermissions perms, + uint64_t ordinal) + : COFFBaseDefinedAtom(file, name, Kind::File), _sectionName(sectionName), + _sectionSize(sectionSize), _scope(scope), _contentType(contentType), + _permissions(perms), _ordinal(ordinal), _alignment(0) {} + + static bool classof(const COFFBaseDefinedAtom *atom) { + return atom->getKind() == Kind::File; + } + + void setAlignment(Alignment val) { _alignment = val; } + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + Scope scope() const override { return _scope; } + ContentType contentType() const override { return _contentType; } + ContentPermissions permissions() const override { return _permissions; } + uint64_t ordinal() const override { return _ordinal; } + Alignment alignment() const override { return _alignment; } + + void addAssociate(const DefinedAtom *other) { + auto *ref = new SimpleReference(Reference::KindNamespace::all, + Reference::KindArch::all, + lld::Reference::kindAssociate, 0, other, 0); + addReference(std::unique_ptr<SimpleReference>(ref)); + } + +private: + StringRef _sectionName; + uint64_t _sectionSize; + Scope _scope; + ContentType _contentType; + ContentPermissions _permissions; + uint64_t _ordinal; + Alignment _alignment; + std::vector<std::unique_ptr<SimpleReference>> _references; +}; + +// A COFFDefinedAtom represents an atom read from a file and has contents. +class COFFDefinedAtom : public COFFDefinedFileAtom { +public: + COFFDefinedAtom(const File &file, StringRef name, StringRef sectionName, + uint64_t sectionSize, Scope scope, ContentType type, + bool isComdat, ContentPermissions perms, Merge merge, + ArrayRef<uint8_t> data, uint64_t ordinal) + : COFFDefinedFileAtom(file, name, sectionName, sectionSize, + scope, type, perms, ordinal), + _isComdat(isComdat), _merge(merge), _dataref(data) {} + + Merge merge() const override { return _merge; } + uint64_t size() const override { return _dataref.size(); } + ArrayRef<uint8_t> rawContent() const override { return _dataref; } + + DeadStripKind deadStrip() const override { + // Only COMDAT symbols would be dead-stripped. + return _isComdat ? deadStripNormal : deadStripNever; + } + +private: + bool _isComdat; + Merge _merge; + ArrayRef<uint8_t> _dataref; +}; + +// A COFFDefinedAtom represents an atom for BSS section. +class COFFBSSAtom : public COFFDefinedFileAtom { +public: + COFFBSSAtom(const File &file, StringRef name, Scope scope, + ContentPermissions perms, Merge merge, uint32_t size, + uint64_t ordinal) + : COFFDefinedFileAtom(file, name, ".bss", 0, scope, typeZeroFill, + perms, ordinal), + _merge(merge), _size(size) {} + + Merge merge() const override { return _merge; } + uint64_t size() const override { return _size; } + ArrayRef<uint8_t> rawContent() const override { return _contents; } + +private: + Merge _merge; + uint32_t _size; + std::vector<uint8_t> _contents; +}; + +/// A COFFLinkerInternalAtom represents a defined atom created by the linker, +/// not read from file. +class COFFLinkerInternalAtom : public COFFBaseDefinedAtom { +public: + SectionChoice sectionChoice() const override { return sectionBasedOnContent; } + uint64_t ordinal() const override { return _ordinal; } + Scope scope() const override { return scopeGlobal; } + Alignment alignment() const override { return Alignment(0); } + uint64_t size() const override { return _data.size(); } + ArrayRef<uint8_t> rawContent() const override { return _data; } + +protected: + COFFLinkerInternalAtom(const File &file, uint64_t ordinal, + std::vector<uint8_t> data, StringRef symbolName = "") + : COFFBaseDefinedAtom(file, symbolName, Kind::Internal), + _ordinal(ordinal), _data(std::move(data)) {} + +private: + uint64_t _ordinal; + std::vector<uint8_t> _data; +}; + +class COFFStringAtom : public COFFLinkerInternalAtom { +public: + COFFStringAtom(const File &file, uint64_t ordinal, StringRef sectionName, + StringRef contents) + : COFFLinkerInternalAtom(file, ordinal, stringRefToVector(contents)), + _sectionName(sectionName) {} + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return _sectionName; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + +private: + StringRef _sectionName; + + std::vector<uint8_t> stringRefToVector(StringRef name) const { + std::vector<uint8_t> ret(name.size() + 1); + memcpy(&ret[0], name.data(), name.size()); + ret[name.size()] = 0; + return ret; + } +}; + +// A COFFSharedLibraryAtom represents a symbol for data in an import library. A +// reference to a COFFSharedLibraryAtom will be transformed to a real reference +// to an import address table entry in Idata pass. +class COFFSharedLibraryAtom : public SharedLibraryAtom { +public: + COFFSharedLibraryAtom(const File &file, uint16_t hint, StringRef symbolName, + StringRef importName, StringRef dllName) + : _file(file), _hint(hint), _mangledName(addImpPrefix(symbolName)), + _importName(importName), _dllName(dllName), _importTableEntry(nullptr) { + } + + const File &file() const override { return _file; } + uint16_t hint() const { return _hint; } + + /// Returns the symbol name to be used by the core linker. + StringRef name() const override { return _mangledName; } + + /// Returns the symbol name to be used in the import description table in the + /// COFF header. + virtual StringRef importName() const { return _importName; } + + StringRef loadName() const override { return _dllName; } + bool canBeNullAtRuntime() const override { return false; } + Type type() const override { return Type::Unknown; } + uint64_t size() const override { return 0; } + + void setImportTableEntry(const DefinedAtom *atom) { + _importTableEntry = atom; + } + + const DefinedAtom *getImportTableEntry() const { return _importTableEntry; } + +private: + /// Mangle the symbol name by adding "__imp_" prefix. See the file comment of + /// ReaderImportHeader.cpp for details about the prefix. + std::string addImpPrefix(StringRef symbolName) { + std::string ret("__imp_"); + ret.append(symbolName); + return ret; + } + + const File &_file; + uint16_t _hint; + std::string _mangledName; + std::string _importName; + StringRef _dllName; + const DefinedAtom *_importTableEntry; +}; + +// An instance of this class represents "input file" for atoms created in a +// pass. Atoms need to be associated to an input file even if it's not read from +// a file, so we use this class for that. +class VirtualFile : public SimpleFile { +public: + VirtualFile(const LinkingContext &ctx) + : SimpleFile("<virtual-file>"), _nextOrdinal(0) { + setOrdinal(ctx.getNextOrdinalAndIncrement()); + } + + uint64_t getNextOrdinal() { return _nextOrdinal++; } + +private: + uint64_t _nextOrdinal; +}; + +//===----------------------------------------------------------------------===// +// +// Utility functions to handle layout edges. +// +//===----------------------------------------------------------------------===// + +template <typename T, typename U> +void addLayoutEdge(T *a, U *b, uint32_t which) { + auto ref = new SimpleReference(Reference::KindNamespace::all, + Reference::KindArch::all, + which, 0, b, 0); + a->addReference(std::unique_ptr<SimpleReference>(ref)); +} + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/CMakeLists.txt b/lib/ReaderWriter/PECOFF/CMakeLists.txt new file mode 100644 index 0000000000000..86b49b79f194e --- /dev/null +++ b/lib/ReaderWriter/PECOFF/CMakeLists.txt @@ -0,0 +1,16 @@ +add_llvm_library(lldPECOFF + EdataPass.cpp + IdataPass.cpp + LinkerGeneratedSymbolFile.cpp + LoadConfigPass.cpp + PECOFFLinkingContext.cpp + Pass.cpp + ReaderCOFF.cpp + ReaderImportHeader.cpp + WriterImportLibrary.cpp + WriterPECOFF.cpp + LINK_LIBS + lldCore + LLVMObject + LLVMSupport + ) diff --git a/lib/ReaderWriter/PECOFF/EdataPass.cpp b/lib/ReaderWriter/PECOFF/EdataPass.cpp new file mode 100644 index 0000000000000..ad79f171f3c9f --- /dev/null +++ b/lib/ReaderWriter/PECOFF/EdataPass.cpp @@ -0,0 +1,227 @@ +//===- lib/ReaderWriter/PECOFF/EdataPass.cpp ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Pass.h" +#include "EdataPass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include <climits> +#include <ctime> +#include <utility> + +using lld::pecoff::edata::EdataAtom; +using lld::pecoff::edata::TableEntry; +using llvm::object::export_address_table_entry; +using llvm::object::export_directory_table_entry; + +namespace lld { +namespace pecoff { + +typedef PECOFFLinkingContext::ExportDesc ExportDesc; + +// dedupExports removes duplicate export entries. If two exports are +// referring the same symbol, they are considered duplicates. +// This could happen if the same symbol name is specified as an argument +// to /export more than once, or an unmangled and mangled name of the +// same symbol are given to /export. In the latter case, we choose +// unmangled (shorter) name. +static void dedupExports(PECOFFLinkingContext &ctx) { + std::vector<ExportDesc> &exports = ctx.getDllExports(); + // Pass 1: find duplicate entries + std::set<const ExportDesc *> dup; + std::map<StringRef, ExportDesc *> map; + for (ExportDesc &exp : exports) { + if (!exp.externalName.empty()) + continue; + StringRef symbol = exp.getRealName(); + auto it = map.find(symbol); + if (it == map.end()) { + map[symbol] = &exp; + } else if (symbol.size() < it->second->getRealName().size()) { + map[symbol] = &exp; + dup.insert(it->second); + } else { + dup.insert(&exp); + } + } + // Pass 2: remove duplicate entries + auto pred = [&](const ExportDesc &exp) { + return dup.count(&exp) == 1; + }; + exports.erase(std::remove_if(exports.begin(), exports.end(), pred), + exports.end()); +} + +static void assignOrdinals(PECOFFLinkingContext &ctx) { + std::vector<ExportDesc> &exports = ctx.getDllExports(); + int maxOrdinal = -1; + for (ExportDesc &desc : exports) + maxOrdinal = std::max(maxOrdinal, desc.ordinal); + + std::sort(exports.begin(), exports.end(), + [](const ExportDesc &a, const ExportDesc &b) { + return a.getExternalName().compare(b.getExternalName()) < 0; + }); + + int nextOrdinal = (maxOrdinal == -1) ? 1 : (maxOrdinal + 1); + for (ExportDesc &desc : exports) + if (desc.ordinal == -1) + desc.ordinal = nextOrdinal++; +} + +static bool getExportedAtoms(PECOFFLinkingContext &ctx, MutableFile *file, + std::vector<TableEntry> &ret) { + std::map<StringRef, const DefinedAtom *> definedAtoms; + for (const DefinedAtom *atom : file->defined()) + definedAtoms[atom->name()] = atom; + + for (PECOFFLinkingContext::ExportDesc &desc : ctx.getDllExports()) { + auto it = definedAtoms.find(desc.getRealName()); + if (it == definedAtoms.end()) { + llvm::errs() << "Symbol <" << desc.name + << "> is exported but not defined.\n"; + return false; + } + const DefinedAtom *atom = it->second; + + // One can export a symbol with a different name than the symbol + // name used in DLL. If such name is specified, use it in the + // .edata section. + ret.push_back(TableEntry(ctx.undecorateSymbol(desc.getExternalName()), + desc.ordinal, atom, desc.noname)); + } + std::sort(ret.begin(), ret.end(), + [](const TableEntry &a, const TableEntry &b) { + return a.exportName.compare(b.exportName) < 0; + }); + + return true; +} + +static std::pair<int, int> getOrdinalBase(std::vector<TableEntry> &entries) { + int ordinalBase = INT_MAX; + int maxOrdinal = -1; + for (TableEntry &e : entries) { + ordinalBase = std::min(ordinalBase, e.ordinal); + maxOrdinal = std::max(maxOrdinal, e.ordinal); + } + return std::pair<int, int>(ordinalBase, maxOrdinal); +} + +edata::EdataAtom * +EdataPass::createAddressTable(const std::vector<TableEntry> &entries, + int ordinalBase, int maxOrdinal) { + EdataAtom *addressTable = + new (_alloc) EdataAtom(_file, sizeof(export_address_table_entry) * + (maxOrdinal - ordinalBase + 1)); + + for (const TableEntry &e : entries) { + int index = e.ordinal - ordinalBase; + size_t offset = index * sizeof(export_address_table_entry); + addDir32NBReloc(addressTable, e.atom, _ctx.getMachineType(), offset); + } + return addressTable; +} + +edata::EdataAtom * +EdataPass::createNamePointerTable(const PECOFFLinkingContext &ctx, + const std::vector<TableEntry> &entries, + MutableFile *file) { + EdataAtom *table = + new (_alloc) EdataAtom(_file, sizeof(uint32_t) * entries.size()); + + size_t offset = 0; + for (const TableEntry &e : entries) { + auto *stringAtom = new (_alloc) COFFStringAtom( + _file, _stringOrdinal++, ".edata", e.exportName); + file->addAtom(*stringAtom); + addDir32NBReloc(table, stringAtom, _ctx.getMachineType(), offset); + offset += sizeof(uint32_t); + } + return table; +} + +edata::EdataAtom *EdataPass::createExportDirectoryTable( + const std::vector<edata::TableEntry> &namedEntries, int ordinalBase, + int maxOrdinal) { + EdataAtom *ret = + new (_alloc) EdataAtom(_file, sizeof(export_directory_table_entry)); + auto *data = ret->getContents<export_directory_table_entry>(); + data->TimeDateStamp = time(nullptr); + data->OrdinalBase = ordinalBase; + data->AddressTableEntries = maxOrdinal - ordinalBase + 1; + data->NumberOfNamePointers = namedEntries.size(); + return ret; +} + +edata::EdataAtom * +EdataPass::createOrdinalTable(const std::vector<TableEntry> &entries, + int ordinalBase) { + EdataAtom *ret = + new (_alloc) EdataAtom(_file, sizeof(uint16_t) * entries.size()); + uint16_t *data = ret->getContents<uint16_t>(); + int i = 0; + for (const TableEntry &e : entries) + data[i++] = e.ordinal - ordinalBase; + return ret; +} + +void EdataPass::perform(std::unique_ptr<MutableFile> &file) { + dedupExports(_ctx); + assignOrdinals(_ctx); + + std::vector<TableEntry> entries; + if (!getExportedAtoms(_ctx, file.get(), entries)) + return; + if (entries.empty()) + return; + + int ordinalBase, maxOrdinal; + std::tie(ordinalBase, maxOrdinal) = getOrdinalBase(entries); + + std::vector<TableEntry> namedEntries; + for (TableEntry &e : entries) + if (!e.noname) + namedEntries.push_back(e); + + EdataAtom *table = + createExportDirectoryTable(namedEntries, ordinalBase, maxOrdinal); + file->addAtom(*table); + + COFFStringAtom *dllName = + new (_alloc) COFFStringAtom(_file, _stringOrdinal++, ".edata", + llvm::sys::path::filename(_ctx.outputPath())); + file->addAtom(*dllName); + addDir32NBReloc(table, dllName, _ctx.getMachineType(), + offsetof(export_directory_table_entry, NameRVA)); + + EdataAtom *addressTable = + createAddressTable(entries, ordinalBase, maxOrdinal); + file->addAtom(*addressTable); + addDir32NBReloc( + table, addressTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, ExportAddressTableRVA)); + + EdataAtom *namePointerTable = + createNamePointerTable(_ctx, namedEntries, file.get()); + file->addAtom(*namePointerTable); + addDir32NBReloc(table, namePointerTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, NamePointerRVA)); + + EdataAtom *ordinalTable = createOrdinalTable(namedEntries, ordinalBase); + file->addAtom(*ordinalTable); + addDir32NBReloc(table, ordinalTable, _ctx.getMachineType(), + offsetof(export_directory_table_entry, OrdinalTableRVA)); +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/EdataPass.h b/lib/ReaderWriter/PECOFF/EdataPass.h new file mode 100644 index 0000000000000..442be3ca24aa0 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/EdataPass.h @@ -0,0 +1,99 @@ +//===- lib/ReaderWriter/PECOFF/EdataPass.h --------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates atoms for the DLL export +/// information. The defined atoms constructed in this pass will go into .edata +/// section. +/// +/// For the details of the .edata section format, see Microsoft PE/COFF +/// Specification section 5.3, The .edata Section. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_EDATA_PASS_H +#define LLD_READER_WRITER_PE_COFF_EDATA_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/COFF.h" +#include <map> + +using llvm::COFF::ImportDirectoryTableEntry; + +namespace lld { +namespace pecoff { +namespace edata { + +struct TableEntry { + TableEntry(StringRef exp, int ord, const DefinedAtom *a, bool n) + : exportName(exp), ordinal(ord), atom(a), noname(n) {} + std::string exportName; + int ordinal; + const DefinedAtom *atom; + bool noname; +}; + +/// The root class of all edata atoms. +class EdataAtom : public COFFLinkerInternalAtom { +public: + EdataAtom(VirtualFile &file, size_t size) + : COFFLinkerInternalAtom(file, file.getNextOrdinal(), + std::vector<uint8_t>(size)) {} + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".edata"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + + template <typename T> T *getContents() const { + return (T *)const_cast<uint8_t *>(rawContent().data()); + } +}; + +} // namespace edata + +class EdataPass : public lld::Pass { +public: + EdataPass(PECOFFLinkingContext &ctx) + : _ctx(ctx), _file(ctx), _is64(ctx.is64Bit()), _stringOrdinal(1024) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + edata::EdataAtom * + createExportDirectoryTable(const std::vector<edata::TableEntry> &namedEntries, + int ordinalBase, int maxOrdinal); + + edata::EdataAtom * + createAddressTable(const std::vector<edata::TableEntry> &entries, + int ordinalBase, int maxOrdinal); + + edata::EdataAtom * + createNamePointerTable(const PECOFFLinkingContext &ctx, + const std::vector<edata::TableEntry> &entries, + MutableFile *file); + + edata::EdataAtom * + createOrdinalTable(const std::vector<edata::TableEntry> &entries, + int ordinalBase); + + PECOFFLinkingContext &_ctx; + VirtualFile _file; + bool _is64; + int _stringOrdinal; + mutable llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/IdataPass.cpp b/lib/ReaderWriter/PECOFF/IdataPass.cpp new file mode 100644 index 0000000000000..d41ef581f7fa5 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/IdataPass.cpp @@ -0,0 +1,345 @@ +//===- lib/ReaderWriter/PECOFF/IdataPass.cpp ------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "IdataPass.h" +#include "Pass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include <algorithm> +#include <cstddef> +#include <cstring> +#include <map> +#include <vector> + +using namespace llvm::support::endian; +using llvm::object::delay_import_directory_table_entry; + +namespace lld { +namespace pecoff { +namespace idata { + +IdataAtom::IdataAtom(IdataContext &context, std::vector<uint8_t> data) + : COFFLinkerInternalAtom(context.dummyFile, + context.dummyFile.getNextOrdinal(), data) { + context.file.addAtom(*this); +} + +HintNameAtom::HintNameAtom(IdataContext &context, uint16_t hint, + StringRef importName) + : IdataAtom(context, assembleRawContent(hint, importName)), + _importName(importName) {} + +std::vector<uint8_t> HintNameAtom::assembleRawContent(uint16_t hint, + StringRef importName) { + size_t size = + llvm::RoundUpToAlignment(sizeof(hint) + importName.size() + 1, 2); + std::vector<uint8_t> ret(size); + ret[importName.size()] = 0; + ret[importName.size() - 1] = 0; + write16le(&ret[0], hint); + std::memcpy(&ret[2], importName.data(), importName.size()); + return ret; +} + +std::vector<uint8_t> +ImportTableEntryAtom::assembleRawContent(uint64_t rva, bool is64) { + // The element size of the import table is 32 bit in PE and 64 bit + // in PE+. In PE+, bits 62-31 are filled with zero. + if (is64) { + std::vector<uint8_t> ret(8); + write64le(&ret[0], rva); + return ret; + } + std::vector<uint8_t> ret(4); + write32le(&ret[0], rva); + return ret; +} + +static std::vector<ImportTableEntryAtom *> +createImportTableAtoms(IdataContext &context, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms, + bool shouldAddReference, StringRef sectionName, + llvm::BumpPtrAllocator &alloc) { + std::vector<ImportTableEntryAtom *> ret; + for (COFFSharedLibraryAtom *atom : sharedAtoms) { + ImportTableEntryAtom *entry = nullptr; + if (atom->importName().empty()) { + // Import by ordinal + uint64_t hint = atom->hint(); + hint |= context.ctx.is64Bit() ? (uint64_t(1) << 63) : (uint64_t(1) << 31); + entry = new (alloc) ImportTableEntryAtom(context, hint, sectionName); + } else { + // Import by name + entry = new (alloc) ImportTableEntryAtom(context, 0, sectionName); + HintNameAtom *hintName = + new (alloc) HintNameAtom(context, atom->hint(), atom->importName()); + addDir32NBReloc(entry, hintName, context.ctx.getMachineType(), 0); + } + ret.push_back(entry); + if (shouldAddReference) + atom->setImportTableEntry(entry); + } + // Add the NULL entry. + ret.push_back(new (alloc) ImportTableEntryAtom(context, 0, sectionName)); + return ret; +} + +// Creates atoms for an import lookup table. The import lookup table is an +// array of pointers to hint/name atoms. The array needs to be terminated with +// the NULL entry. +void ImportDirectoryAtom::addRelocations( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) { + // Create parallel arrays. The contents of the two are initially the + // same. The PE/COFF loader overwrites the import address tables with the + // pointers to the referenced items after loading the executable into + // memory. + std::vector<ImportTableEntryAtom *> importLookupTables = + createImportTableAtoms(context, sharedAtoms, false, ".idata.t", _alloc); + std::vector<ImportTableEntryAtom *> importAddressTables = + createImportTableAtoms(context, sharedAtoms, true, ".idata.a", _alloc); + + addDir32NBReloc(this, importLookupTables[0], context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, ImportLookupTableRVA)); + addDir32NBReloc(this, importAddressTables[0], context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, ImportAddressTableRVA)); + auto *atom = new (_alloc) + COFFStringAtom(context.dummyFile, context.dummyFile.getNextOrdinal(), + ".idata", loadName); + context.file.addAtom(*atom); + addDir32NBReloc(this, atom, context.ctx.getMachineType(), + offsetof(ImportDirectoryTableEntry, NameRVA)); +} + +// Create the contents for the delay-import table. +std::vector<uint8_t> DelayImportDirectoryAtom::createContent() { + std::vector<uint8_t> r(sizeof(delay_import_directory_table_entry), 0); + auto entry = reinterpret_cast<delay_import_directory_table_entry *>(&r[0]); + // link.exe seems to set 1 to Attributes field, so do we. + entry->Attributes = 1; + return r; +} + +// Find "___delayLoadHelper2@8" (or "__delayLoadHelper2" on x64). +// This is not efficient but should be OK for now. +static const Atom * +findDelayLoadHelper(MutableFile &file, const PECOFFLinkingContext &ctx) { + StringRef sym = ctx.getDelayLoadHelperName(); + for (const DefinedAtom *atom : file.defined()) + if (atom->name() == sym) + return atom; + std::string msg = (sym + " was not found").str(); + llvm_unreachable(msg.c_str()); +} + +// Create the data referred by the delay-import table. +void DelayImportDirectoryAtom::addRelocations( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) { + // "ModuleHandle" field. This points to an array of pointer-size data + // in ".data" section. Initially the array is initialized with zero. + // The delay-load import helper will set DLL base address at runtime. + auto *hmodule = new (_alloc) DelayImportAddressAtom(context); + addDir32NBReloc(this, hmodule, context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, ModuleHandle)); + + // "NameTable" field. The data structure of this field is the same + // as (non-delay) import table's Import Lookup Table. Contains + // imported function names. This is a parallel array of AddressTable + // field. + std::vector<ImportTableEntryAtom *> nameTable = + createImportTableAtoms(context, sharedAtoms, false, ".didat", _alloc); + addDir32NBReloc( + this, nameTable[0], context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, DelayImportNameTable)); + + // "Name" field. This points to the NUL-terminated DLL name string. + auto *name = new (_alloc) + COFFStringAtom(context.dummyFile, context.dummyFile.getNextOrdinal(), + ".didat", loadName); + context.file.addAtom(*name); + addDir32NBReloc(this, name, context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, Name)); + + // "AddressTable" field. This points to an array of pointers, which + // in turn pointing to delay-load functions. + std::vector<DelayImportAddressAtom *> addrTable; + for (int i = 0, e = sharedAtoms.size() + 1; i < e; ++i) + addrTable.push_back(new (_alloc) DelayImportAddressAtom(context)); + for (int i = 0, e = sharedAtoms.size(); i < e; ++i) + sharedAtoms[i]->setImportTableEntry(addrTable[i]); + addDir32NBReloc( + this, addrTable[0], context.ctx.getMachineType(), + offsetof(delay_import_directory_table_entry, DelayImportAddressTable)); + + const Atom *delayLoadHelper = findDelayLoadHelper(context.file, context.ctx); + for (int i = 0, e = sharedAtoms.size(); i < e; ++i) { + const DefinedAtom *loader = new (_alloc) DelayLoaderAtom( + context, addrTable[i], this, delayLoadHelper); + if (context.ctx.is64Bit()) + addDir64Reloc(addrTable[i], loader, context.ctx.getMachineType(), 0); + else + addDir32Reloc(addrTable[i], loader, context.ctx.getMachineType(), 0); + } +} + +DelayLoaderAtom::DelayLoaderAtom(IdataContext &context, const Atom *impAtom, + const Atom *descAtom, const Atom *delayLoadHelperAtom) + : IdataAtom(context, createContent(context.ctx.getMachineType())) { + MachineTypes machine = context.ctx.getMachineType(); + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addDir32Reloc(this, impAtom, machine, 3); + addDir32Reloc(this, descAtom, machine, 8); + addRel32Reloc(this, delayLoadHelperAtom, machine, 13); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addRel32Reloc(this, impAtom, machine, 36); + addRel32Reloc(this, descAtom, machine, 43); + addRel32Reloc(this, delayLoadHelperAtom, machine, 48); + break; + default: + llvm::report_fatal_error("unsupported machine type"); + } +} + +// DelayLoaderAtom contains a wrapper function for __delayLoadHelper2. +// +// __delayLoadHelper2 takes two pointers: a pointer to the delay-load +// table descripter and a pointer to _imp_ symbol for the function +// to be resolved. +// +// __delayLoadHelper2 looks at the table descriptor to know the DLL +// name, calls dlopen()-like function to load it, resolves all +// imported symbols, and then writes the resolved addresses to the +// import address table. It returns a pointer to the resolved +// function. +// +// __delayLoadHelper2 is defined in delayimp.lib. +std::vector<uint8_t> +DelayLoaderAtom::createContent(MachineTypes machine) const { + static const uint8_t x86[] = { + 0x51, // push ecx + 0x52, // push edx + 0x68, 0, 0, 0, 0, // push offset ___imp__<FUNCNAME> + 0x68, 0, 0, 0, 0, // push offset ___DELAY_IMPORT_DESCRIPTOR_<DLLNAME>_dll + 0xE8, 0, 0, 0, 0, // call ___delayLoadHelper2@8 + 0x5A, // pop edx + 0x59, // pop ecx + 0xFF, 0xE0, // jmp eax + }; + static const uint8_t x64[] = { + 0x51, // push rcx + 0x52, // push rdx + 0x41, 0x50, // push r8 + 0x41, 0x51, // push r9 + 0x48, 0x83, 0xEC, 0x48, // sub rsp, 48h + 0x66, 0x0F, 0x7F, 0x04, 0x24, // movdqa xmmword ptr [rsp], xmm0 + 0x66, 0x0F, 0x7F, 0x4C, 0x24, 0x10, // movdqa xmmword ptr [rsp+10h], xmm1 + 0x66, 0x0F, 0x7F, 0x54, 0x24, 0x20, // movdqa xmmword ptr [rsp+20h], xmm2 + 0x66, 0x0F, 0x7F, 0x5C, 0x24, 0x30, // movdqa xmmword ptr [rsp+30h], xmm3 + 0x48, 0x8D, 0x15, 0, 0, 0, 0, // lea rdx, [__imp_<FUNCNAME>] + 0x48, 0x8D, 0x0D, 0, 0, 0, 0, // lea rcx, [___DELAY_IMPORT_...] + 0xE8, 0, 0, 0, 0, // call __delayLoadHelper2 + 0x66, 0x0F, 0x6F, 0x04, 0x24, // movdqa xmm0, xmmword ptr [rsp] + 0x66, 0x0F, 0x6F, 0x4C, 0x24, 0x10, // movdqa xmm1, xmmword ptr [rsp+10h] + 0x66, 0x0F, 0x6F, 0x54, 0x24, 0x20, // movdqa xmm2, xmmword ptr [rsp+20h] + 0x66, 0x0F, 0x6F, 0x5C, 0x24, 0x30, // movdqa xmm3, xmmword ptr [rsp+30h] + 0x48, 0x83, 0xC4, 0x48, // add rsp, 48h + 0x41, 0x59, // pop r9 + 0x41, 0x58, // pop r8 + 0x5A, // pop rdx + 0x59, // pop rcx + 0xFF, 0xE0, // jmp rax + }; + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + return std::vector<uint8_t>(x86, x86 + sizeof(x86)); + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + return std::vector<uint8_t>(x64, x64 + sizeof(x64)); + default: + llvm::report_fatal_error("unsupported machine type"); + } +} + +} // namespace idata + +void IdataPass::perform(std::unique_ptr<MutableFile> &file) { + if (file->sharedLibrary().empty()) + return; + + idata::IdataContext context(*file, _dummyFile, _ctx); + std::map<StringRef, std::vector<COFFSharedLibraryAtom *>> sharedAtoms = + groupByLoadName(*file); + bool hasImports = false; + bool hasDelayImports = false; + + // Create the import table and terminate it with the null entry. + for (auto i : sharedAtoms) { + StringRef loadName = i.first; + if (_ctx.isDelayLoadDLL(loadName)) + continue; + hasImports = true; + std::vector<COFFSharedLibraryAtom *> &atoms = i.second; + new (_alloc) idata::ImportDirectoryAtom(context, loadName, atoms); + } + if (hasImports) + new (_alloc) idata::NullImportDirectoryAtom(context); + + // Create the delay import table and terminate it with the null entry. + for (auto i : sharedAtoms) { + StringRef loadName = i.first; + if (!_ctx.isDelayLoadDLL(loadName)) + continue; + hasDelayImports = true; + std::vector<COFFSharedLibraryAtom *> &atoms = i.second; + new (_alloc) idata::DelayImportDirectoryAtom(context, loadName, atoms); + } + if (hasDelayImports) + new (_alloc) idata::DelayNullImportDirectoryAtom(context); + + replaceSharedLibraryAtoms(*file); +} + +std::map<StringRef, std::vector<COFFSharedLibraryAtom *> > +IdataPass::groupByLoadName(MutableFile &file) { + std::map<StringRef, COFFSharedLibraryAtom *> uniqueAtoms; + for (const SharedLibraryAtom *atom : file.sharedLibrary()) + uniqueAtoms[atom->name()] = + (COFFSharedLibraryAtom *)const_cast<SharedLibraryAtom *>(atom); + + std::map<StringRef, std::vector<COFFSharedLibraryAtom *> > ret; + for (auto i : uniqueAtoms) { + COFFSharedLibraryAtom *atom = i.second; + ret[atom->loadName()].push_back(atom); + } + return ret; +} + +/// Transforms a reference to a COFFSharedLibraryAtom to a real reference. +void IdataPass::replaceSharedLibraryAtoms(MutableFile &file) { + for (const DefinedAtom *atom : file.defined()) { + for (const Reference *ref : *atom) { + const Atom *target = ref->target(); + auto *sharedAtom = dyn_cast<SharedLibraryAtom>(target); + if (!sharedAtom) + continue; + const auto *coffSharedAtom = (const COFFSharedLibraryAtom *)sharedAtom; + const DefinedAtom *entry = coffSharedAtom->getImportTableEntry(); + const_cast<Reference *>(ref)->setTarget(entry); + } + } +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/IdataPass.h b/lib/ReaderWriter/PECOFF/IdataPass.h new file mode 100644 index 0000000000000..9db82160339a3 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/IdataPass.h @@ -0,0 +1,218 @@ +//===- lib/ReaderWriter/PECOFF/IdataPass.h---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates atoms for the DLL import +/// information. The defined atoms constructed in this pass will go into .idata +/// section, unless .idata section is merged with other section such as .data. +/// +/// For the details of the .idata section format, see Microsoft PE/COFF +/// Specification section 5.4, The .idata Section. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_IDATA_PASS_H +#define LLD_READER_WRITER_PE_COFF_IDATA_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/COFF.h" +#include <algorithm> +#include <map> + +using llvm::COFF::ImportDirectoryTableEntry; + +namespace lld { +namespace pecoff { +namespace idata { + +class DLLNameAtom; +class HintNameAtom; +class ImportTableEntryAtom; + +// A state object of this pass. +struct IdataContext { + IdataContext(MutableFile &f, VirtualFile &g, const PECOFFLinkingContext &c) + : file(f), dummyFile(g), ctx(c) {} + MutableFile &file; + VirtualFile &dummyFile; + const PECOFFLinkingContext &ctx; +}; + +/// The root class of all idata atoms. +class IdataAtom : public COFFLinkerInternalAtom { +public: + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".idata"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + +protected: + IdataAtom(IdataContext &context, std::vector<uint8_t> data); +}; + +/// A HintNameAtom represents a symbol that will be imported from a DLL at +/// runtime. It consists with an optional hint, which is a small integer, and a +/// symbol name. +/// +/// A hint is an index of the export pointer table in a DLL. If the import +/// library and DLL is in sync (i.e., ".lib" and ".dll" is for the same version +/// or the symbol ordinal is maintained by hand with ".exp" file), the PE/COFF +/// loader can find the symbol quickly. +class HintNameAtom : public IdataAtom { +public: + HintNameAtom(IdataContext &context, uint16_t hint, StringRef importName); + + StringRef getContentString() { return _importName; } + +private: + std::vector<uint8_t> assembleRawContent(uint16_t hint, StringRef importName); + StringRef _importName; +}; + +class ImportTableEntryAtom : public IdataAtom { +public: + ImportTableEntryAtom(IdataContext &ctx, uint64_t contents, + StringRef sectionName) + : IdataAtom(ctx, assembleRawContent(contents, ctx.ctx.is64Bit())), + _sectionName(sectionName) {} + + StringRef customSectionName() const override { + return _sectionName; + }; + +private: + std::vector<uint8_t> assembleRawContent(uint64_t contents, bool is64); + StringRef _sectionName; +}; + +/// An ImportDirectoryAtom includes information to load a DLL, including a DLL +/// name, symbols that will be resolved from the DLL, and the import address +/// table that are overwritten by the loader with the pointers to the referenced +/// items. The executable has one ImportDirectoryAtom per one imported DLL. +class ImportDirectoryAtom : public IdataAtom { +public: + ImportDirectoryAtom(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) + : IdataAtom(context, std::vector<uint8_t>(20, 0)) { + addRelocations(context, loadName, sharedAtoms); + } + + StringRef customSectionName() const override { return ".idata.d"; } + +private: + void addRelocations(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms); + + mutable llvm::BumpPtrAllocator _alloc; +}; + +/// The last NULL entry in the import directory. +class NullImportDirectoryAtom : public IdataAtom { +public: + explicit NullImportDirectoryAtom(IdataContext &context) + : IdataAtom(context, std::vector<uint8_t>(20, 0)) {} + + StringRef customSectionName() const override { return ".idata.d"; } +}; + +/// The class for the the delay-load import table. +class DelayImportDirectoryAtom : public IdataAtom { +public: + DelayImportDirectoryAtom( + IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms) + : IdataAtom(context, createContent()) { + addRelocations(context, loadName, sharedAtoms); + } + + StringRef customSectionName() const override { return ".didat.d"; } + +private: + std::vector<uint8_t> createContent(); + void addRelocations(IdataContext &context, StringRef loadName, + const std::vector<COFFSharedLibraryAtom *> &sharedAtoms); + + mutable llvm::BumpPtrAllocator _alloc; +}; + +/// Terminator of the delay-load import table. The content of this atom is all +/// zero. +class DelayNullImportDirectoryAtom : public IdataAtom { +public: + explicit DelayNullImportDirectoryAtom(IdataContext &context) + : IdataAtom(context, createContent()) {} + StringRef customSectionName() const override { return ".didat.d"; } + +private: + std::vector<uint8_t> createContent() const { + return std::vector<uint8_t>( + sizeof(llvm::object::delay_import_directory_table_entry), 0); + } +}; + +class DelayImportAddressAtom : public IdataAtom { +public: + explicit DelayImportAddressAtom(IdataContext &context) + : IdataAtom(context, createContent(context.ctx)), + _align(Alignment(context.ctx.is64Bit() ? 3 : 2)) {} + StringRef customSectionName() const override { return ".data"; } + ContentPermissions permissions() const override { return permRW_; } + Alignment alignment() const override { return _align; } + +private: + std::vector<uint8_t> createContent(const PECOFFLinkingContext &ctx) const { + return std::vector<uint8_t>(ctx.is64Bit() ? 8 : 4, 0); + } + + Alignment _align; +}; + +// DelayLoaderAtom contains a wrapper function for __delayLoadHelper2. +class DelayLoaderAtom : public IdataAtom { +public: + DelayLoaderAtom(IdataContext &context, const Atom *impAtom, + const Atom *descAtom, const Atom *delayLoadHelperAtom); + StringRef customSectionName() const override { return ".text"; } + ContentPermissions permissions() const override { return permR_X; } + Alignment alignment() const override { return Alignment(0); } + +private: + std::vector<uint8_t> createContent(MachineTypes machine) const; +}; + +} // namespace idata + +class IdataPass : public lld::Pass { +public: + IdataPass(const PECOFFLinkingContext &ctx) : _dummyFile(ctx), _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + std::map<StringRef, std::vector<COFFSharedLibraryAtom *>> + groupByLoadName(MutableFile &file); + + void replaceSharedLibraryAtoms(MutableFile &file); + + // A dummy file with which all the atoms created in the pass will be + // associated. Atoms need to be associated to an input file even if it's not + // read from a file, so we use this object. + VirtualFile _dummyFile; + + const PECOFFLinkingContext &_ctx; + llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/InferSubsystemPass.h b/lib/ReaderWriter/PECOFF/InferSubsystemPass.h new file mode 100644 index 0000000000000..cbf863ee47848 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/InferSubsystemPass.h @@ -0,0 +1,66 @@ +//===- lib/ReaderWriter/PECOFF/InferSubsystemPass.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_INFER_SUBSYSTEM_PASS_H +#define LLD_READER_WRITER_PE_COFF_INFER_SUBSYSTEM_PASS_H + +#include "Atoms.h" +#include "lld/Core/Pass.h" +#include <vector> + +namespace lld { +namespace pecoff { + +// Infers subsystem from entry point function name. +class InferSubsystemPass : public lld::Pass { +public: + InferSubsystemPass(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override { + if (_ctx.getSubsystem() != WindowsSubsystem::IMAGE_SUBSYSTEM_UNKNOWN) + return; + + if (_ctx.isDll()) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI); + return; + } + + // Scan the resolved symbols to infer the subsystem. + const std::string wWinMain = _ctx.decorateSymbol("wWinMainCRTStartup"); + const std::string wWinMainAt = _ctx.decorateSymbol("wWinMainCRTStartup@"); + const std::string winMain = _ctx.decorateSymbol("WinMainCRTStartup"); + const std::string winMainAt = _ctx.decorateSymbol("WinMainCRTStartup@"); + const std::string wmain = _ctx.decorateSymbol("wmainCRTStartup"); + const std::string wmainAt = _ctx.decorateSymbol("wmainCRTStartup@"); + const std::string main = _ctx.decorateSymbol("mainCRTStartup"); + const std::string mainAt = _ctx.decorateSymbol("mainCRTStartup@"); + + for (const DefinedAtom *atom : file->definedAtoms()) { + if (atom->name() == wWinMain || atom->name().startswith(wWinMainAt) || + atom->name() == winMain || atom->name().startswith(winMainAt)) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI); + return; + } + if (atom->name() == wmain || atom->name().startswith(wmainAt) || + atom->name() == main || atom->name().startswith(mainAt)) { + _ctx.setSubsystem(WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_CUI); + return; + } + } + llvm::report_fatal_error("Failed to infer subsystem"); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp new file mode 100644 index 0000000000000..a11410784b8c2 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp @@ -0,0 +1,48 @@ +//===- lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LinkerGeneratedSymbolFile.h" + +namespace lld { +namespace pecoff { + +// Find decorated symbol, namely /sym@[0-9]+/ or /\?sym@@.+/. +bool findDecoratedSymbol(PECOFFLinkingContext *ctx, + std::string sym, std::string &res) { + const std::set<std::string> &defined = ctx->definedSymbols(); + // Search for /sym@[0-9]+/ + { + std::string s = sym + '@'; + auto it = defined.lower_bound(s); + for (auto e = defined.end(); it != e; ++it) { + if (!StringRef(*it).startswith(s)) + break; + if (it->size() == s.size()) + continue; + StringRef suffix = StringRef(*it).substr(s.size()); + if (suffix.find_first_not_of("0123456789") != StringRef::npos) + continue; + res = *it; + return true; + } + } + // Search for /\?sym@@.+/ + { + std::string s = "?" + ctx->undecorateSymbol(sym).str() + "@@"; + auto it = defined.lower_bound(s); + if (it != defined.end() && StringRef(*it).startswith(s)) { + res = *it; + return true; + } + } + return false; +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h new file mode 100644 index 0000000000000..b9764d70bb3bf --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h @@ -0,0 +1,309 @@ +//===- lib/ReaderWriter/PECOFF/LinkerGeneratedSymbolFile.h ----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/Allocator.h" +#include <algorithm> +#include <mutex> + +using llvm::COFF::WindowsSubsystem; + +namespace lld { +namespace pecoff { + +bool findDecoratedSymbol(PECOFFLinkingContext *ctx, + std::string sym, std::string &res); + +namespace impl { + +/// The defined atom for dllexported symbols with __imp_ prefix. +class ImpPointerAtom : public COFFLinkerInternalAtom { +public: + ImpPointerAtom(const File &file, StringRef symbolName, uint64_t ordinal) + : COFFLinkerInternalAtom(file, /*oridnal*/ 0, std::vector<uint8_t>(4), + symbolName), + _ordinal(ordinal) {} + + uint64_t ordinal() const override { return _ordinal; } + Scope scope() const override { return scopeGlobal; } + ContentType contentType() const override { return typeData; } + Alignment alignment() const override { return Alignment(4); } + ContentPermissions permissions() const override { return permR__; } + +private: + uint64_t _ordinal; +}; + +class ImpSymbolFile : public SimpleFile { +public: + ImpSymbolFile(StringRef defsym, StringRef undefsym, uint64_t ordinal, + bool is64) + : SimpleFile(defsym), _undefined(*this, undefsym), + _defined(*this, defsym, ordinal) { + SimpleReference *ref; + if (is64) { + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32, + 0, &_undefined, 0); + } else { + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32, + 0, &_undefined, 0); + } + _defined.addReference(std::unique_ptr<SimpleReference>(ref)); + addAtom(_defined); + addAtom(_undefined); + }; + +private: + SimpleUndefinedAtom _undefined; + ImpPointerAtom _defined; +}; + +// A file to make Resolver to resolve a symbol TO instead of a symbol FROM, +// using fallback mechanism for an undefined symbol. One can virtually rename an +// undefined symbol using this file. +class SymbolRenameFile : public SimpleFile { +public: + SymbolRenameFile(StringRef from, StringRef to) + : SimpleFile("<symbol-rename>"), _fromSym(from), _toSym(to), + _from(*this, _fromSym, &_to), _to(*this, _toSym) { + addAtom(_from); + }; + +private: + std::string _fromSym; + std::string _toSym; + COFFUndefinedAtom _from; + COFFUndefinedAtom _to; +}; + +} // namespace impl + +// A virtual file containing absolute symbol __ImageBase. __ImageBase (or +// ___ImageBase on x86) is a linker-generated symbol whose address is the same +// as the image base address. +class LinkerGeneratedSymbolFile : public SimpleFile { +public: + LinkerGeneratedSymbolFile(const PECOFFLinkingContext &ctx) + : SimpleFile("<linker-internal-file>"), + _imageBaseAtom(*this, ctx.decorateSymbol("__ImageBase"), + Atom::scopeGlobal, ctx.getBaseAddress()) { + addAtom(_imageBaseAtom); + }; + +private: + SimpleAbsoluteAtom _imageBaseAtom; +}; + +// A LocallyImporteSymbolFile is an archive file containing __imp_ +// symbols for local use. +// +// For each defined symbol, linker creates an implicit defined symbol +// by appending "__imp_" prefix to the original name. The content of +// the implicit symbol is a pointer to the original symbol +// content. This feature allows one to compile and link the following +// code without error, although _imp__hello is not defined in the +// code. (the leading "_" in this example is automatically appended, +// assuming it's x86.) +// +// void hello() { printf("Hello\n"); } +// extern void (*_imp__hello)(); +// int main() { +// _imp__hello(); +// return 0; +// } +// +// This odd feature is for the compatibility with MSVC link.exe. +class LocallyImportedSymbolFile : public SimpleArchiveLibraryFile { +public: + LocallyImportedSymbolFile(const PECOFFLinkingContext &ctx) + : SimpleArchiveLibraryFile("__imp_"), _is64(ctx.is64Bit()), + _ordinal(0) {} + + File *find(StringRef sym, bool dataSymbolOnly) override { + std::string prefix = "__imp_"; + if (!sym.startswith(prefix)) + return nullptr; + StringRef undef = sym.substr(prefix.size()); + return new (_alloc) impl::ImpSymbolFile(sym, undef, _ordinal++, _is64); + } + +private: + bool _is64; + uint64_t _ordinal; + llvm::BumpPtrAllocator _alloc; +}; + +// A ExportedSymbolRenameFile is a virtual archive file for dllexported symbols. +// +// One usually has to specify the exact symbol name to resolve it. That's true +// in most cases for PE/COFF, except the one described below. +// +// DLLExported symbols can be specified using a module definition file. In a +// file, one can write an EXPORT directive followed by symbol names. Such +// symbols may not be fully decorated. +// +// If a symbol FOO is specified to be dllexported by a module definition file, +// linker has to search not only for /FOO/ but also for /FOO@[0-9]+/ for stdcall +// and for /\?FOO@@.+/ for C++. This ambiguous matching semantics does not fit +// well with Resolver. +// +// We could probably modify Resolver to resolve ambiguous symbols, but I think +// we don't want to do that because it'd be rarely used, and only this Windows +// specific feature would use it. It's probably not a good idea to make the core +// linker to be able to deal with it. +// +// So, instead of tweaking Resolver, we chose to do some hack here. An +// ExportedSymbolRenameFile maintains a set containing all possibly defined +// symbol names. That set would be a union of (1) all the defined symbols that +// are already parsed and read and (2) all the defined symbols in archive files +// that are not yet be parsed. +// +// If Resolver asks this file to return an atom for a dllexported symbol, find() +// looks up the set, doing ambiguous matching. If there's a symbol with @ +// prefix, it returns an atom to rename the dllexported symbol, hoping that +// Resolver will find the new symbol with atsign from an archive file at the +// next visit. +class ExportedSymbolRenameFile : public SimpleArchiveLibraryFile { +public: + ExportedSymbolRenameFile(const PECOFFLinkingContext &ctx) + : SimpleArchiveLibraryFile("<export>"), + _ctx(const_cast<PECOFFLinkingContext *>(&ctx)) { + for (PECOFFLinkingContext::ExportDesc &desc : _ctx->getDllExports()) + _exportedSyms.insert(desc.name); + } + + File *find(StringRef sym, bool dataSymbolOnly) override { + typedef PECOFFLinkingContext::ExportDesc ExportDesc; + if (_exportedSyms.count(sym) == 0) + return nullptr; + std::string replace; + if (!findDecoratedSymbol(_ctx, sym.str(), replace)) + return nullptr; + + for (ExportDesc &exp : _ctx->getDllExports()) + if (exp.name == sym) + exp.mangledName = replace; + if (_ctx->deadStrip()) + _ctx->addDeadStripRoot(_ctx->allocate(replace)); + return new (_alloc) impl::SymbolRenameFile(sym, replace); + } + +private: + std::set<std::string> _exportedSyms; + llvm::BumpPtrAllocator _alloc; + PECOFFLinkingContext *_ctx; +}; + +// Windows has not only one but many entry point functions. The +// appropriate one is automatically selected based on the subsystem +// setting and the user-supplied entry point function. +// +// http://msdn.microsoft.com/en-us/library/f9t8842e.aspx +class EntryPointFile : public SimpleFile { +public: + EntryPointFile(const PECOFFLinkingContext &ctx) + : SimpleFile("<entry>"), _ctx(const_cast<PECOFFLinkingContext *>(&ctx)), + _firstTime(true) {} + + const atom_collection<UndefinedAtom> &undefined() const override { + return const_cast<EntryPointFile *>(this)->getUndefinedAtoms(); + } + +private: + const atom_collection<UndefinedAtom> &getUndefinedAtoms() { + std::lock_guard<std::mutex> lock(_mutex); + if (!_firstTime) + return _undefinedAtoms; + _firstTime = false; + + if (_ctx->hasEntry()) { + StringRef entrySym = _ctx->allocate(getEntry()); + _undefinedAtoms._atoms.push_back( + new (_alloc) SimpleUndefinedAtom(*this, entrySym)); + _ctx->setHasEntry(true); + _ctx->setEntrySymbolName(entrySym); + if (_ctx->deadStrip()) + _ctx->addDeadStripRoot(entrySym); + } + return _undefinedAtoms; + } + + // Returns the entry point function name. + std::string getEntry() const { + StringRef opt = _ctx->getEntrySymbolName(); + if (!opt.empty()) { + std::string mangled; + if (findDecoratedSymbol(_ctx, opt, mangled)) + return mangled; + return _ctx->decorateSymbol(opt); + } + return _ctx->decorateSymbol(getDefaultEntry()); + } + + std::string getDefaultEntry() const { + const std::string wWinMainCRTStartup = "wWinMainCRTStartup"; + const std::string WinMainCRTStartup = "WinMainCRTStartup"; + const std::string wmainCRTStartup = "wmainCRTStartup"; + const std::string mainCRTStartup = "mainCRTStartup"; + + if (_ctx->isDll()) { + if (_ctx->getMachineType() == llvm::COFF::IMAGE_FILE_MACHINE_I386) + return "_DllMainCRTStartup@12"; + return "_DllMainCRTStartup"; + } + + // Returns true if a given name exists in an input object file. + auto defined = [&](StringRef name) -> bool { + StringRef sym = _ctx->decorateSymbol(name); + if (_ctx->definedSymbols().count(sym)) + return true; + std::string ignore; + return findDecoratedSymbol(_ctx, sym, ignore); + }; + + switch (_ctx->getSubsystem()) { + case WindowsSubsystem::IMAGE_SUBSYSTEM_UNKNOWN: { + if (defined("wWinMain")) + return wWinMainCRTStartup; + if (defined("WinMain")) + return WinMainCRTStartup; + if (defined("wmain")) + return wmainCRTStartup; + if (!defined("main")) + llvm::errs() << "Cannot infer subsystem; assuming /subsystem:console\n"; + return mainCRTStartup; + } + case WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_GUI: + if (defined("WinMain")) + return WinMainCRTStartup; + return wWinMainCRTStartup; + case WindowsSubsystem::IMAGE_SUBSYSTEM_WINDOWS_CUI: + if (defined("wmain")) + return wmainCRTStartup; + return mainCRTStartup; + default: + return mainCRTStartup; + } + } + + PECOFFLinkingContext *_ctx; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + std::mutex _mutex; + llvm::BumpPtrAllocator _alloc; + bool _firstTime; +}; + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp b/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp new file mode 100644 index 0000000000000..be2f5627f4ea7 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LoadConfigPass.cpp @@ -0,0 +1,75 @@ +//===- lib/ReaderWriter/PECOFF/LoadConfigPass.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A Load Configuration is a data structure for x86 containing an address of the +// SEH handler table. The Data Directory in the file header points to a load +// configuration. Technically that indirection is not needed but exists for +// historical reasons. +// +// If the file being handled has .sxdata section containing SEH handler table, +// this pass will create a Load Configuration atom. +// +//===----------------------------------------------------------------------===// + +#include "Pass.h" +#include "LoadConfigPass.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Path.h" +#include <climits> +#include <ctime> +#include <utility> + +using llvm::object::coff_load_configuration32; + +namespace lld { +namespace pecoff { +namespace loadcfg { + +LoadConfigAtom::LoadConfigAtom(VirtualFile &file, const DefinedAtom *sxdata, + int count) + : COFFLinkerInternalAtom( + file, file.getNextOrdinal(), + std::vector<uint8_t>(sizeof(coff_load_configuration32))) { + addDir32Reloc( + this, sxdata, llvm::COFF::IMAGE_FILE_MACHINE_I386, + offsetof(llvm::object::coff_load_configuration32, SEHandlerTable)); + auto *data = getContents<llvm::object::coff_load_configuration32>(); + data->SEHandlerCount = count; +} + +} // namespace loadcfg + +void LoadConfigPass::perform(std::unique_ptr<MutableFile> &file) { + if (_ctx.noSEH()) + return; + + // Find the first atom in .sxdata section. + const DefinedAtom *sxdata = nullptr; + int sectionSize = 0; + for (const DefinedAtom *atom : file->defined()) { + if (atom->customSectionName() == ".sxdata") { + if (!sxdata) + sxdata = atom; + sectionSize += sxdata->size(); + } + } + if (!sxdata) + return; + + auto *loadcfg = new (_alloc) + loadcfg::LoadConfigAtom(_file, sxdata, sectionSize / sizeof(uint32_t)); + file->addAtom(*loadcfg); +} + +} // namespace pecoff +} // namespace lld diff --git a/lib/ReaderWriter/PECOFF/LoadConfigPass.h b/lib/ReaderWriter/PECOFF/LoadConfigPass.h new file mode 100644 index 0000000000000..9f4a25f2b10e2 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/LoadConfigPass.h @@ -0,0 +1,63 @@ +//===- lib/ReaderWriter/PECOFF/LoadConfigPass.h ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This linker pass creates an atom for Load Configuration +/// structure. +/// +/// For the details of the Load Configuration structure, see Microsoft PE/COFF +/// Specification section 5.8. The Load Configuration Structure (Image Only). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_LOAD_CONFIG_PASS_H +#define LLD_READER_WRITER_PE_COFF_LOAD_CONFIG_PASS_H + +#include "Atoms.h" +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include <map> + +namespace lld { +namespace pecoff { +namespace loadcfg { + +class LoadConfigAtom : public COFFLinkerInternalAtom { +public: + LoadConfigAtom(VirtualFile &file, const DefinedAtom *sxdata, int count); + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + StringRef customSectionName() const override { return ".loadcfg"; } + ContentType contentType() const override { return typeData; } + ContentPermissions permissions() const override { return permR__; } + + template <typename T> T *getContents() const { + return (T *)const_cast<uint8_t *>(rawContent().data()); + } +}; + +} // namespace loadcfg + +class LoadConfigPass : public lld::Pass { +public: + LoadConfigPass(PECOFFLinkingContext &ctx) : _ctx(ctx), _file(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override; + +private: + PECOFFLinkingContext &_ctx; + VirtualFile _file; + mutable llvm::BumpPtrAllocator _alloc; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/Makefile b/lib/ReaderWriter/PECOFF/Makefile new file mode 100644 index 0000000000000..3ad16969bba71 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Makefile @@ -0,0 +1,14 @@ +##===- lld/lib/ReaderWriter/PECOFF/Makefile --------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LLD_LEVEL := ../../.. +LIBRARYNAME := lldPECOFF +USEDLIBS = lldCore.a + +include $(LLD_LEVEL)/Makefile diff --git a/lib/ReaderWriter/PECOFF/OrderPass.h b/lib/ReaderWriter/PECOFF/OrderPass.h new file mode 100644 index 0000000000000..73133ff73638d --- /dev/null +++ b/lib/ReaderWriter/PECOFF/OrderPass.h @@ -0,0 +1,67 @@ +//===- lib/ReaderWriter/PECOFF/OrderPass.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This pass sorts atoms by section name, so that they will appear +/// in the correct order in the output. +/// +/// In COFF, sections will be merged into one section by the linker if their +/// names are the same after discarding the "$" character and all characters +/// follow it from their names. The characters following the "$" character +/// determines the merge order. Assume there's an object file containing four +/// data sections in the following order. +/// +/// - .data$2 +/// - .data$3 +/// - .data$1 +/// - .data +/// +/// In this case, the resulting binary should have ".data" section with the +/// contents of ".data", ".data$1", ".data$2" and ".data$3" in that order. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_ORDER_PASS_H +#define LLD_READER_WRITER_PE_COFF_ORDER_PASS_H + +#include "Atoms.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/Pass.h" +#include <algorithm> + +namespace lld { +namespace pecoff { + +static bool compare(const DefinedAtom *lhs, const DefinedAtom *rhs) { + bool lhsCustom = (lhs->sectionChoice() == DefinedAtom::sectionCustomRequired); + bool rhsCustom = (rhs->sectionChoice() == DefinedAtom::sectionCustomRequired); + if (lhsCustom && rhsCustom) { + int cmp = lhs->customSectionName().compare(rhs->customSectionName()); + if (cmp != 0) + return cmp < 0; + return DefinedAtom::compareByPosition(lhs, rhs); + } + if (lhsCustom && !rhsCustom) + return true; + if (!lhsCustom && rhsCustom) + return false; + return DefinedAtom::compareByPosition(lhs, rhs); +} + +class OrderPass : public lld::Pass { +public: + void perform(std::unique_ptr<MutableFile> &file) override { + MutableFile::DefinedAtomRange defined = file->definedAtoms(); + parallel_sort(defined.begin(), defined.end(), compare); + } +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/PDBPass.h b/lib/ReaderWriter/PECOFF/PDBPass.h new file mode 100644 index 0000000000000..0efa054db8231 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/PDBPass.h @@ -0,0 +1,43 @@ +//===- lib/ReaderWriter/PECOFF/PDBPass.h ----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_PDB_PASS_H +#define LLD_READER_WRITER_PE_COFF_PDB_PASS_H + +#include "lld/Core/Pass.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Process.h" + +namespace lld { +namespace pecoff { + +class PDBPass : public lld::Pass { +public: + PDBPass(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + void perform(std::unique_ptr<MutableFile> &file) override { + if (_ctx.getDebug()) + touch(_ctx.getPDBFilePath()); + } + +private: + void touch(StringRef path) { + int fd; + if (llvm::sys::fs::openFileForWrite(path, fd, llvm::sys::fs::F_Append)) + llvm::report_fatal_error("failed to create a PDB file"); + llvm::sys::Process::SafelyCloseFileDescriptor(fd); + } + + PECOFFLinkingContext &_ctx; +}; + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp b/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp new file mode 100644 index 0000000000000..6a657e33541d2 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp @@ -0,0 +1,352 @@ +//===- lib/ReaderWriter/PECOFF/PECOFFLinkingContext.cpp -------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "EdataPass.h" +#include "IdataPass.h" +#include "InferSubsystemPass.h" +#include "LinkerGeneratedSymbolFile.h" +#include "LoadConfigPass.h" +#include "OrderPass.h" +#include "PDBPass.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Path.h" +#include <bitset> +#include <climits> +#include <set> + +namespace lld { + +bool PECOFFLinkingContext::validateImpl(raw_ostream &diagnostics) { + if (_stackReserve < _stackCommit) { + diagnostics << "Invalid stack size: reserve size must be equal to or " + << "greater than commit size, but got " << _stackCommit + << " and " << _stackReserve << ".\n"; + return false; + } + + if (_heapReserve < _heapCommit) { + diagnostics << "Invalid heap size: reserve size must be equal to or " + << "greater than commit size, but got " << _heapCommit + << " and " << _heapReserve << ".\n"; + return false; + } + + // It's an error if the base address is not multiple of 64K. + if (getBaseAddress() & 0xffff) { + diagnostics << "Base address have to be multiple of 64K, but got " + << getBaseAddress() << "\n"; + return false; + } + + // Specifing /noentry without /dll is an error. + if (!hasEntry() && !isDll()) { + diagnostics << "/noentry must be specified with /dll\n"; + return false; + } + + // Check for duplicate export ordinals. + std::set<int> exports; + for (const PECOFFLinkingContext::ExportDesc &desc : getDllExports()) { + if (desc.ordinal == -1) + continue; + if (exports.count(desc.ordinal) == 1) { + diagnostics << "Duplicate export ordinals: " << desc.ordinal << "\n"; + return false; + } + exports.insert(desc.ordinal); + } + + // Check for /align. + std::bitset<64> alignment(_sectionDefaultAlignment); + if (alignment.count() != 1) { + diagnostics << "Section alignment must be a power of 2, but got " + << _sectionDefaultAlignment << "\n"; + return false; + } + + _writer = createWriterPECOFF(*this); + return true; +} + +const std::set<std::string> &PECOFFLinkingContext::definedSymbols() { + std::lock_guard<std::recursive_mutex> lock(_mutex); + for (std::unique_ptr<Node> &node : getNodes()) { + if (_seen.count(node.get()) > 0) + continue; + FileNode *fnode = dyn_cast<FileNode>(node.get()); + if (!fnode) + continue; + File *file = fnode->getFile(); + if (file->parse()) + continue; + if (auto *archive = dyn_cast<ArchiveLibraryFile>(file)) { + for (const std::string &sym : archive->getDefinedSymbols()) + _definedSyms.insert(sym); + continue; + } + for (const DefinedAtom *atom : file->defined()) + if (!atom->name().empty()) + _definedSyms.insert(atom->name()); + } + return _definedSyms; +} + +std::unique_ptr<File> PECOFFLinkingContext::createEntrySymbolFile() const { + return LinkingContext::createEntrySymbolFile("<command line option /entry>"); +} + +std::unique_ptr<File> PECOFFLinkingContext::createUndefinedSymbolFile() const { + return LinkingContext::createUndefinedSymbolFile( + "<command line option /include>"); +} + +static int getGroupStartPos(std::vector<std::unique_ptr<Node>> &nodes) { + for (int i = 0, e = nodes.size(); i < e; ++i) + if (GroupEnd *group = dyn_cast<GroupEnd>(nodes[i].get())) + return i - group->getSize(); + llvm::report_fatal_error("internal error"); +} + +void PECOFFLinkingContext::addLibraryFile(std::unique_ptr<FileNode> file) { + GroupEnd *currentGroupEnd; + int pos = -1; + std::vector<std::unique_ptr<Node>> &elements = getNodes(); + for (int i = 0, e = elements.size(); i < e; ++i) { + if ((currentGroupEnd = dyn_cast<GroupEnd>(elements[i].get()))) { + pos = i; + break; + } + } + assert(pos >= 0); + elements.insert(elements.begin() + pos, std::move(file)); + elements[pos + 1] = llvm::make_unique<GroupEnd>( + currentGroupEnd->getSize() + 1); +} + +bool PECOFFLinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File>> &) { + std::vector<std::unique_ptr<Node>> &members = getNodes(); + + // Create a file for the entry point function. + std::unique_ptr<FileNode> entry(new FileNode( + llvm::make_unique<pecoff::EntryPointFile>(*this))); + members.insert(members.begin() + getGroupStartPos(members), std::move(entry)); + + // Create a file for __ImageBase. + std::unique_ptr<FileNode> fileNode(new FileNode( + llvm::make_unique<pecoff::LinkerGeneratedSymbolFile>(*this))); + members.push_back(std::move(fileNode)); + + // Create a file for _imp_ symbols. + std::unique_ptr<FileNode> impFileNode(new FileNode( + llvm::make_unique<pecoff::LocallyImportedSymbolFile>(*this))); + members.push_back(std::move(impFileNode)); + + // Create a file for dllexported symbols. + std::unique_ptr<FileNode> exportNode(new FileNode( + llvm::make_unique<pecoff::ExportedSymbolRenameFile>(*this))); + addLibraryFile(std::move(exportNode)); + + return true; +} + +/// Returns the section name in the resulting executable. +/// +/// Sections in object files are usually output to the executable with the same +/// name, but you can rename by command line option. /merge:from=to makes the +/// linker to combine "from" section contents to "to" section in the +/// executable. We have a mapping for the renaming. This method looks up the +/// table and returns a new section name if renamed. +StringRef +PECOFFLinkingContext::getOutputSectionName(StringRef sectionName) const { + auto it = _renamedSections.find(sectionName); + if (it == _renamedSections.end()) + return sectionName; + return getOutputSectionName(it->second); +} + +/// Adds a mapping to the section renaming table. This method will be used for +/// /merge command line option. +bool PECOFFLinkingContext::addSectionRenaming(raw_ostream &diagnostics, + StringRef from, StringRef to) { + auto it = _renamedSections.find(from); + if (it != _renamedSections.end()) { + if (it->second == to) + // There's already the same mapping. + return true; + diagnostics << "Section \"" << from << "\" is already mapped to \"" + << it->second << ", so it cannot be mapped to \"" << to << "\"."; + return true; + } + + // Add a mapping, and check if there's no cycle in the renaming mapping. The + // cycle detection algorithm we use here is naive, but that's OK because the + // number of mapping is usually less than 10. + _renamedSections[from] = to; + for (auto elem : _renamedSections) { + StringRef sectionName = elem.first; + std::set<StringRef> visited; + visited.insert(sectionName); + for (;;) { + auto pos = _renamedSections.find(sectionName); + if (pos == _renamedSections.end()) + break; + if (visited.count(pos->second)) { + diagnostics << "/merge:" << from << "=" << to << " makes a cycle"; + return false; + } + sectionName = pos->second; + visited.insert(sectionName); + } + } + return true; +} + +/// Try to find the input library file from the search paths and append it to +/// the input file list. Returns true if the library file is found. +StringRef PECOFFLinkingContext::searchLibraryFile(StringRef filename) const { + // Current directory always takes precedence over the search paths. + if (llvm::sys::path::is_absolute(filename) || llvm::sys::fs::exists(filename)) + return filename; + // Iterate over the search paths. + for (StringRef dir : _inputSearchPaths) { + SmallString<128> path = dir; + llvm::sys::path::append(path, filename); + if (llvm::sys::fs::exists(path.str())) + return allocate(path.str()); + } + return filename; +} + +/// Returns the decorated name of the given symbol name. On 32-bit x86, it +/// adds "_" at the beginning of the string. On other architectures, the +/// return value is the same as the argument. +StringRef PECOFFLinkingContext::decorateSymbol(StringRef name) const { + if (_machineType != llvm::COFF::IMAGE_FILE_MACHINE_I386) + return name; + std::string str = "_"; + str.append(name); + return allocate(str); +} + +StringRef PECOFFLinkingContext::undecorateSymbol(StringRef name) const { + if (_machineType != llvm::COFF::IMAGE_FILE_MACHINE_I386) + return name; + if (!name.startswith("_")) + return name; + return name.substr(1); +} + +uint64_t PECOFFLinkingContext::getBaseAddress() const { + if (_baseAddress == invalidBaseAddress) + return is64Bit() ? pe32PlusDefaultBaseAddress : pe32DefaultBaseAddress; + return _baseAddress; +} + +Writer &PECOFFLinkingContext::writer() const { return *_writer; } + +void PECOFFLinkingContext::setSectionSetMask(StringRef sectionName, + uint32_t newFlags) { + _sectionSetMask[sectionName] |= newFlags; + _sectionClearMask[sectionName] &= ~newFlags; + const uint32_t rwx = (llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_MEM_WRITE | + llvm::COFF::IMAGE_SCN_MEM_EXECUTE); + if (newFlags & rwx) + _sectionClearMask[sectionName] |= ~_sectionSetMask[sectionName] & rwx; + assert((_sectionSetMask[sectionName] & _sectionClearMask[sectionName]) == 0); +} + +void PECOFFLinkingContext::setSectionClearMask(StringRef sectionName, + uint32_t newFlags) { + _sectionClearMask[sectionName] |= newFlags; + _sectionSetMask[sectionName] &= ~newFlags; + assert((_sectionSetMask[sectionName] & _sectionClearMask[sectionName]) == 0); +} + +uint32_t PECOFFLinkingContext::getSectionAttributes(StringRef sectionName, + uint32_t flags) const { + auto si = _sectionSetMask.find(sectionName); + uint32_t setMask = (si == _sectionSetMask.end()) ? 0 : si->second; + auto ci = _sectionClearMask.find(sectionName); + uint32_t clearMask = (ci == _sectionClearMask.end()) ? 0 : ci->second; + return (flags | setMask) & ~clearMask; +} + +// Returns true if two export descriptors are the same. +static bool sameExportDesc(const PECOFFLinkingContext::ExportDesc &a, + const PECOFFLinkingContext::ExportDesc &b) { + return a.ordinal == b.ordinal && a.ordinal == b.ordinal && + a.noname == b.noname && a.isData == b.isData; +} + +void PECOFFLinkingContext::addDllExport(ExportDesc &desc) { + addInitialUndefinedSymbol(allocate(desc.name)); + + // MSVC link.exe silently drops characters after the first atsign. + // For example, /export:foo@4=bar is equivalent to /export:foo=bar. + // We do the same thing for compatibility. + if (!desc.externalName.empty()) { + StringRef s(desc.externalName); + size_t pos = s.find('@'); + if (pos != s.npos) + desc.externalName = s.substr(0, pos); + } + + // Scan the vector to look for existing entry. It's not very fast, + // but because the number of exported symbol is usually not that + // much, it should be okay. + for (ExportDesc &e : _dllExports) { + if (e.name != desc.name) + continue; + if (!sameExportDesc(e, desc)) + llvm::errs() << "Export symbol '" << desc.name + << "' specified more than once.\n"; + return; + } + _dllExports.push_back(desc); +} + +static std::string replaceExtension(StringRef path, StringRef ext) { + SmallString<128> ss = path; + llvm::sys::path::replace_extension(ss, ext); + return ss.str(); +} + +std::string PECOFFLinkingContext::getOutputImportLibraryPath() const { + if (!_implib.empty()) + return _implib; + return replaceExtension(outputPath(), ".lib"); +} + +std::string PECOFFLinkingContext::getPDBFilePath() const { + assert(_debug); + if (!_pdbFilePath.empty()) + return _pdbFilePath; + return replaceExtension(outputPath(), ".pdb"); +} + +void PECOFFLinkingContext::addPasses(PassManager &pm) { + pm.add(llvm::make_unique<pecoff::PDBPass>(*this)); + pm.add(llvm::make_unique<pecoff::EdataPass>(*this)); + pm.add(llvm::make_unique<pecoff::IdataPass>(*this)); + pm.add(llvm::make_unique<pecoff::OrderPass>()); + pm.add(llvm::make_unique<pecoff::LoadConfigPass>(*this)); + pm.add(llvm::make_unique<pecoff::InferSubsystemPass>(*this)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/Pass.cpp b/lib/ReaderWriter/PECOFF/Pass.cpp new file mode 100644 index 0000000000000..ed731984e378b --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Pass.cpp @@ -0,0 +1,95 @@ +//===- lib/ReaderWriter/PECOFF/Pass.cpp -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "Pass.h" +#include "lld/Core/File.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/COFF.h" + +namespace lld { +namespace pecoff { + +static void addReloc(COFFBaseDefinedAtom *atom, const Atom *target, + size_t offsetInAtom, Reference::KindArch arch, + Reference::KindValue relType) { + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, arch, relType, offsetInAtom, target, 0)); +} + +void addDir64Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR64); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addDir32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addDir32NBReloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32NB); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_ADDR32NB); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + addReloc(atom, target, offsetInAtom, Reference::KindArch::ARM, + llvm::COFF::IMAGE_REL_ARM_ADDR32NB); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +void addRel32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom) { + switch (machine) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_REL32); + return; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + addReloc(atom, target, offsetInAtom, Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_REL32); + return; + default: + llvm_unreachable("unsupported machine type"); + } +} + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/Pass.h b/lib/ReaderWriter/PECOFF/Pass.h new file mode 100644 index 0000000000000..22466f77859e6 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/Pass.h @@ -0,0 +1,34 @@ +//===- lib/ReaderWriter/PECOFF/Pass.h -------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_PASS_H +#define LLD_READER_WRITER_PE_COFF_PASS_H + +#include "Atoms.h" +#include "llvm/Support/COFF.h" + +namespace lld { +namespace pecoff { + +void addDir64Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addDir32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addDir32NBReloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +void addRel32Reloc(COFFBaseDefinedAtom *atom, const Atom *target, + llvm::COFF::MachineTypes machine, size_t offsetInAtom); + +} // namespace pecoff +} // namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp b/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp new file mode 100644 index 0000000000000..f060bd8dc0bc0 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/ReaderCOFF.cpp @@ -0,0 +1,1140 @@ +//===- lib/ReaderWriter/PECOFF/ReaderCOFF.cpp -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Alias.h" +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <map> +#include <mutex> +#include <set> +#include <system_error> +#include <vector> + +#define DEBUG_TYPE "ReaderCOFF" + +using lld::pecoff::COFFBSSAtom; +using lld::pecoff::COFFDefinedAtom; +using lld::pecoff::COFFDefinedFileAtom; +using lld::pecoff::COFFUndefinedAtom; +using llvm::object::coff_aux_section_definition; +using llvm::object::coff_aux_weak_external; +using llvm::object::coff_relocation; +using llvm::object::coff_section; +using llvm::object::coff_symbol; +using llvm::support::ulittle32_t; + +using namespace lld; + +namespace { + +class BumpPtrStringSaver : public llvm::cl::StringSaver { +public: + const char *SaveString(const char *str) override { + size_t len = strlen(str); + std::lock_guard<std::mutex> lock(_allocMutex); + char *copy = _alloc.Allocate<char>(len + 1); + memcpy(copy, str, len + 1); + return copy; + } + +private: + llvm::BumpPtrAllocator _alloc; + std::mutex _allocMutex; +}; + +class FileCOFF : public File { +private: + typedef std::vector<llvm::object::COFFSymbolRef> SymbolVectorT; + typedef std::map<const coff_section *, SymbolVectorT> SectionToSymbolsT; + +public: + FileCOFF(std::unique_ptr<MemoryBuffer> mb, PECOFFLinkingContext &ctx) + : File(mb->getBufferIdentifier(), kindObject), _mb(std::move(mb)), + _compatibleWithSEH(false), _ordinal(1), + _machineType(llvm::COFF::MT_Invalid), _ctx(ctx) {} + + std::error_code doParse() override; + bool isCompatibleWithSEH() const { return _compatibleWithSEH; } + llvm::COFF::MachineTypes getMachineType() { return _machineType; } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _undefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _absoluteAtoms; + } + + void beforeLink() override; + + void addUndefinedSymbol(StringRef sym) { + _undefinedAtoms._atoms.push_back(new (_alloc) COFFUndefinedAtom(*this, sym)); + } + + AliasAtom *createAlias(StringRef name, const DefinedAtom *target, int cnt); + void createAlternateNameAtoms(); + std::error_code parseDirectiveSection(StringRef directives); + + mutable llvm::BumpPtrAllocator _alloc; + +private: + std::error_code readSymbolTable(SymbolVectorT &result); + + void createAbsoluteAtoms(const SymbolVectorT &symbols, + std::vector<const AbsoluteAtom *> &result); + + std::error_code + createUndefinedAtoms(const SymbolVectorT &symbols, + std::vector<const UndefinedAtom *> &result); + + std::error_code + createDefinedSymbols(const SymbolVectorT &symbols, + std::vector<const DefinedAtom *> &result); + + std::error_code cacheSectionAttributes(); + std::error_code maybeCreateSXDataAtoms(); + + std::error_code + AtomizeDefinedSymbolsInSection(const coff_section *section, + SymbolVectorT &symbols, + std::vector<COFFDefinedFileAtom *> &atoms); + + std::error_code + AtomizeDefinedSymbols(SectionToSymbolsT &definedSymbols, + std::vector<const DefinedAtom *> &definedAtoms); + + std::error_code findAtomAt(const coff_section *section, + uint32_t targetAddress, + COFFDefinedFileAtom *&result, + uint32_t &offsetInAtom); + + std::error_code getAtomBySymbolIndex(uint32_t index, Atom *&ret); + + std::error_code + addRelocationReference(const coff_relocation *rel, + const coff_section *section); + + std::error_code getSectionContents(StringRef sectionName, + ArrayRef<uint8_t> &result); + std::error_code getReferenceArch(Reference::KindArch &result); + std::error_code addRelocationReferenceToAtoms(); + std::error_code findSection(StringRef name, const coff_section *&result); + StringRef ArrayRefToString(ArrayRef<uint8_t> array); + uint64_t getNextOrdinal(); + + std::unique_ptr<const llvm::object::COFFObjectFile> _obj; + std::unique_ptr<MemoryBuffer> _mb; + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<UndefinedAtom> _undefinedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + atom_collection_vector<AbsoluteAtom> _absoluteAtoms; + + // The target type of the object. + Reference::KindArch _referenceArch; + + // True if the object has "@feat.00" symbol. + bool _compatibleWithSEH; + + // A map from symbol to its name. All symbols should be in this map except + // unnamed ones. + std::map<llvm::object::COFFSymbolRef, StringRef> _symbolName; + + // A map from symbol to its resultant atom. + std::map<llvm::object::COFFSymbolRef, Atom *> _symbolAtom; + + // A map from symbol to its aux symbol. + std::map<llvm::object::COFFSymbolRef, llvm::object::COFFSymbolRef> _auxSymbol; + + // A map from section to its atoms. + std::map<const coff_section *, std::vector<COFFDefinedFileAtom *>> + _sectionAtoms; + + // A set of COMDAT sections. + std::set<const coff_section *> _comdatSections; + + // A map to get whether the section allows its contents to be merged or not. + std::map<const coff_section *, DefinedAtom::Merge> _merge; + + // COMDAT associative sections + std::multimap<const coff_section *, const coff_section *> _association; + + // A sorted map to find an atom from a section and an offset within + // the section. + std::map<const coff_section *, std::multimap<uint32_t, COFFDefinedAtom *>> + _definedAtomLocations; + + uint64_t _ordinal; + llvm::COFF::MachineTypes _machineType; + PECOFFLinkingContext &_ctx; + mutable BumpPtrStringSaver _stringSaver; +}; + +// Converts the COFF symbol attribute to the LLD's atom attribute. +Atom::Scope getScope(llvm::object::COFFSymbolRef symbol) { + switch (symbol.getStorageClass()) { + case llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL: + return Atom::scopeGlobal; + case llvm::COFF::IMAGE_SYM_CLASS_STATIC: + case llvm::COFF::IMAGE_SYM_CLASS_LABEL: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("Unknown scope"); +} + +DefinedAtom::ContentType getContentType(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_CODE) + return DefinedAtom::typeCode; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) + return DefinedAtom::typeData; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return DefinedAtom::typeZeroFill; + return DefinedAtom::typeUnknown; +} + +DefinedAtom::ContentPermissions getPermissions(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ && + section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_WRITE) + return DefinedAtom::permRW_; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ && + section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_EXECUTE) + return DefinedAtom::permR_X; + if (section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_READ) + return DefinedAtom::permR__; + return DefinedAtom::perm___; +} + +/// Returns the alignment of the section. The contents of the section must be +/// aligned by this value in the resulting executable/DLL. +DefinedAtom::Alignment getAlignment(const coff_section *section) { + if (section->Characteristics & llvm::COFF::IMAGE_SCN_TYPE_NO_PAD) + return DefinedAtom::Alignment(0); + + // Bit [20:24] contains section alignment information. We need to decrease + // the value stored by 1 in order to get the real exponent (e.g, ALIGN_1BYTE + // is 0x00100000, but the exponent should be 0) + uint32_t characteristics = (section->Characteristics >> 20) & 0xf; + + // If all bits are off, we treat it as if ALIGN_1BYTE was on. The PE/COFF spec + // does not say anything about this case, but CVTRES.EXE does not set any bit + // in characteristics[20:24], and its output is intended to be copied to .rsrc + // section with no padding, so I think doing this is the right thing. + if (characteristics == 0) + return DefinedAtom::Alignment(0); + + uint32_t powerOf2 = characteristics - 1; + return DefinedAtom::Alignment(powerOf2); +} + +DefinedAtom::Merge getMerge(const coff_aux_section_definition *auxsym) { + switch (auxsym->Selection) { + case llvm::COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: + return DefinedAtom::mergeNo; + case llvm::COFF::IMAGE_COMDAT_SELECT_ANY: + return DefinedAtom::mergeAsWeakAndAddressUsed; + case llvm::COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: + // TODO: This mapping is wrong. Fix it. + return DefinedAtom::mergeByContent; + case llvm::COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: + return DefinedAtom::mergeSameNameAndSize; + case llvm::COFF::IMAGE_COMDAT_SELECT_LARGEST: + return DefinedAtom::mergeByLargestSection; + case llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: + case llvm::COFF::IMAGE_COMDAT_SELECT_NEWEST: + // FIXME: These attributes has more complicated semantics than the regular + // weak symbol. These are mapped to mergeAsWeakAndAddressUsed for now + // because the core linker does not support them yet. We eventually have + // to implement them for full COFF support. + return DefinedAtom::mergeAsWeakAndAddressUsed; + default: + llvm_unreachable("Unknown merge type"); + } +} + +StringRef getMachineName(llvm::COFF::MachineTypes Type) { + switch (Type) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + return "ARM"; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + return "X86"; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + return "X64"; + } +} + +std::error_code FileCOFF::doParse() { + auto binaryOrErr = llvm::object::createBinary(_mb->getMemBufferRef()); + if (std::error_code ec = binaryOrErr.getError()) + return ec; + std::unique_ptr<llvm::object::Binary> bin = std::move(binaryOrErr.get()); + + _obj.reset(dyn_cast<const llvm::object::COFFObjectFile>(bin.get())); + if (!_obj) + return make_error_code(llvm::object::object_error::invalid_file_type); + bin.release(); + + _machineType = static_cast<llvm::COFF::MachineTypes>(_obj->getMachine()); + + if (getMachineType() != llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN && + getMachineType() != _ctx.getMachineType()) { + llvm::errs() << "module machine type '" + << getMachineName(getMachineType()) + << "' conflicts with target machine type '" + << getMachineName(_ctx.getMachineType()) << "'\n"; + return NativeReaderError::conflicting_target_machine; + } + + if (std::error_code ec = getReferenceArch(_referenceArch)) + return ec; + + // Read the symbol table and atomize them if possible. Defined atoms + // cannot be atomized in one pass, so they will be not be atomized but + // added to symbolToAtom. + SymbolVectorT symbols; + if (std::error_code ec = readSymbolTable(symbols)) + return ec; + + createAbsoluteAtoms(symbols, _absoluteAtoms._atoms); + if (std::error_code ec = + createUndefinedAtoms(symbols, _undefinedAtoms._atoms)) + return ec; + if (std::error_code ec = createDefinedSymbols(symbols, _definedAtoms._atoms)) + return ec; + if (std::error_code ec = addRelocationReferenceToAtoms()) + return ec; + if (std::error_code ec = maybeCreateSXDataAtoms()) + return ec; + + // Check for /SAFESEH. + if (_ctx.requireSEH() && !isCompatibleWithSEH()) { + llvm::errs() << "/SAFESEH is specified, but " + << _mb->getBufferIdentifier() + << " is not compatible with SEH.\n"; + return llvm::object::object_error::parse_failed; + } + return std::error_code(); +} + +void FileCOFF::beforeLink() { + // Acquire the mutex to mutate _ctx. + std::lock_guard<std::recursive_mutex> lock(_ctx.getMutex()); + std::set<StringRef> undefSyms; + + // Interpret .drectve section if the section has contents. + ArrayRef<uint8_t> directives; + if (getSectionContents(".drectve", directives)) + return; + if (!directives.empty()) { + std::set<StringRef> orig; + for (StringRef sym : _ctx.initialUndefinedSymbols()) + orig.insert(sym); + if (parseDirectiveSection(ArrayRefToString(directives))) + return; + for (StringRef sym : _ctx.initialUndefinedSymbols()) + if (orig.count(sym) == 0) + undefSyms.insert(sym); + } + + // Add /INCLUDE'ed symbols to the file as if they existed in the + // file as undefined symbols. + for (StringRef sym : undefSyms) { + addUndefinedSymbol(sym); + _ctx.addDeadStripRoot(sym); + } + + // One can define alias symbols using /alternatename:<sym>=<sym> option. + // The mapping for /alternatename is in the context object. This helper + // function iterate over defined atoms and create alias atoms if needed. + createAlternateNameAtoms(); + + // In order to emit SEH table, all input files need to be compatible with + // SEH. Disable SEH if the file being read is not compatible. + if (!isCompatibleWithSEH()) + _ctx.setSafeSEH(false); +} + +/// Iterate over the symbol table to retrieve all symbols. +std::error_code +FileCOFF::readSymbolTable(SymbolVectorT &result) { + for (uint32_t i = 0, e = _obj->getNumberOfSymbols(); i != e; ++i) { + // Retrieve the symbol. + ErrorOr<llvm::object::COFFSymbolRef> sym = _obj->getSymbol(i); + StringRef name; + if (std::error_code ec = sym.getError()) + return ec; + if (sym->getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG) + goto next; + result.push_back(*sym); + + if (std::error_code ec = _obj->getSymbolName(*sym, name)) + return ec; + + // Existence of the symbol @feat.00 indicates that object file is compatible + // with Safe Exception Handling. + if (name == "@feat.00") { + _compatibleWithSEH = true; + goto next; + } + + // Cache the name. + _symbolName[*sym] = name; + + // Symbol may be followed by auxiliary symbol table records. The aux + // record can be in any format, but the size is always the same as the + // regular symbol. The aux record supplies additional information for the + // standard symbol. We do not interpret the aux record here, but just + // store it to _auxSymbol. + if (sym->getNumberOfAuxSymbols() > 0) { + ErrorOr<llvm::object::COFFSymbolRef> aux = _obj->getSymbol(i + 1); + if (std::error_code ec = aux.getError()) + return ec; + _auxSymbol[*sym] = *aux; + } + next: + i += sym->getNumberOfAuxSymbols(); + } + return std::error_code(); +} + +/// Create atoms for the absolute symbols. +void FileCOFF::createAbsoluteAtoms(const SymbolVectorT &symbols, + std::vector<const AbsoluteAtom *> &result) { + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_ABSOLUTE) + continue; + auto *atom = new (_alloc) SimpleAbsoluteAtom(*this, _symbolName[sym], + getScope(sym), sym.getValue()); + result.push_back(atom); + _symbolAtom[sym] = atom; + } +} + +/// Create atoms for the undefined symbols. This code is bit complicated +/// because it supports "weak externals" mechanism of COFF. If an undefined +/// symbol (sym1) has auxiliary data, the data contains a symbol table index +/// at which the "second symbol" (sym2) for sym1 exists. If sym1 is resolved, +/// it's linked normally. If not, sym1 is resolved as if it has sym2's +/// name. This relationship between sym1 and sym2 is represented using +/// fallback mechanism of undefined symbol. +std::error_code +FileCOFF::createUndefinedAtoms(const SymbolVectorT &symbols, + std::vector<const UndefinedAtom *> &result) { + std::map<llvm::object::COFFSymbolRef, llvm::object::COFFSymbolRef> + weakExternal; + std::set<llvm::object::COFFSymbolRef> fallback; + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + // Create a mapping from sym1 to sym2, if the undefined symbol has + // auxiliary data. + auto iter = _auxSymbol.find(sym); + if (iter == _auxSymbol.end()) + continue; + const coff_aux_weak_external *aux = + reinterpret_cast<const coff_aux_weak_external *>( + iter->second.getRawPtr()); + ErrorOr<llvm::object::COFFSymbolRef> sym2 = _obj->getSymbol(aux->TagIndex); + if (std::error_code ec = sym2.getError()) + return ec; + weakExternal[sym] = *sym2; + fallback.insert(*sym2); + } + + // Create atoms for the undefined symbols. + for (llvm::object::COFFSymbolRef sym : symbols) { + if (sym.getSectionNumber() != llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + if (fallback.count(sym) > 0) + continue; + + // If the symbol has sym2, create an undefiend atom for sym2, so that we + // can pass it as a fallback atom. + UndefinedAtom *fallback = nullptr; + auto iter = weakExternal.find(sym); + if (iter != weakExternal.end()) { + llvm::object::COFFSymbolRef sym2 = iter->second; + fallback = new (_alloc) COFFUndefinedAtom(*this, _symbolName[sym2]); + _symbolAtom[sym2] = fallback; + } + + // Create an atom for the symbol. + auto *atom = + new (_alloc) COFFUndefinedAtom(*this, _symbolName[sym], fallback); + result.push_back(atom); + _symbolAtom[sym] = atom; + } + return std::error_code(); +} + +/// Create atoms for the defined symbols. This pass is a bit complicated than +/// the other two, because in order to create the atom for the defined symbol +/// we need to know the adjacent symbols. +std::error_code +FileCOFF::createDefinedSymbols(const SymbolVectorT &symbols, + std::vector<const DefinedAtom *> &result) { + // A defined atom can be merged if its section attribute allows its contents + // to be merged. In COFF, it's not very easy to get the section attribute + // for the symbol, so scan all sections in advance and cache the attributes + // for later use. + if (std::error_code ec = cacheSectionAttributes()) + return ec; + + // Filter non-defined atoms, and group defined atoms by its section. + SectionToSymbolsT definedSymbols; + for (llvm::object::COFFSymbolRef sym : symbols) { + // A symbol with section number 0 and non-zero value represents a common + // symbol. The MS COFF spec did not give a definition of what the common + // symbol is. We should probably follow ELF's definition shown below. + // + // - If one object file has a common symbol and another has a definition, + // the common symbol is treated as an undefined reference. + // - If there is no definition for a common symbol, the program linker + // acts as though it saw a definition initialized to zero of the + // appropriate size. + // - Two object files may have common symbols of + // different sizes, in which case the program linker will use the + // largest size. + // + // FIXME: We are currently treating the common symbol as a normal + // mergeable atom. Implement the above semantcis. + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED && + sym.getValue() > 0) { + StringRef name = _symbolName[sym]; + uint32_t size = sym.getValue(); + auto *atom = new (_alloc) + COFFBSSAtom(*this, name, getScope(sym), DefinedAtom::permRW_, + DefinedAtom::mergeAsWeakAndAddressUsed, size, getNextOrdinal()); + + // Common symbols should be aligned on natural boundaries with the maximum + // of 32 byte. It's not documented anywhere, but it's what MSVC link.exe + // seems to be doing. + uint64_t alignment = std::min((uint64_t)32, llvm::NextPowerOf2(size)); + atom->setAlignment( + DefinedAtom::Alignment(llvm::countTrailingZeros(alignment))); + result.push_back(atom); + continue; + } + + // Skip if it's not for defined atom. + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_DEBUG || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_ABSOLUTE || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + + const coff_section *sec; + if (std::error_code ec = _obj->getSection(sym.getSectionNumber(), sec)) + return ec; + assert(sec && "SectionIndex > 0, Sec must be non-null!"); + + uint8_t sc = sym.getStorageClass(); + if (sc != llvm::COFF::IMAGE_SYM_CLASS_EXTERNAL && + sc != llvm::COFF::IMAGE_SYM_CLASS_STATIC && + sc != llvm::COFF::IMAGE_SYM_CLASS_FUNCTION && + sc != llvm::COFF::IMAGE_SYM_CLASS_LABEL) { + llvm::errs() << "Unable to create atom for: " << _symbolName[sym] << " (" + << static_cast<int>(sc) << ")\n"; + return llvm::object::object_error::parse_failed; + } + + definedSymbols[sec].push_back(sym); + } + + // Atomize the defined symbols. + if (std::error_code ec = AtomizeDefinedSymbols(definedSymbols, result)) + return ec; + + return std::error_code(); +} + +// Cache the COMDAT attributes, which indicate whether the symbols in the +// section can be merged or not. +std::error_code FileCOFF::cacheSectionAttributes() { + // The COMDAT section attribute is not an attribute of coff_section, but is + // stored in the auxiliary symbol for the first symbol referring a COMDAT + // section. It feels to me that it's unnecessarily complicated, but this is + // how COFF works. + for (auto i : _auxSymbol) { + // Read a section from the file + llvm::object::COFFSymbolRef sym = i.first; + if (sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_ABSOLUTE || + sym.getSectionNumber() == llvm::COFF::IMAGE_SYM_UNDEFINED) + continue; + + const coff_section *sec; + if (std::error_code ec = _obj->getSection(sym.getSectionNumber(), sec)) + return ec; + const coff_aux_section_definition *aux = + reinterpret_cast<const coff_aux_section_definition *>( + i.second.getRawPtr()); + + if (sec->Characteristics & llvm::COFF::IMAGE_SCN_LNK_COMDAT) { + // Read aux symbol data. + _comdatSections.insert(sec); + _merge[sec] = getMerge(aux); + } + + // Handle associative sections. + if (aux->Selection == llvm::COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + const coff_section *parent; + if (std::error_code ec = + _obj->getSection(aux->getNumber(sym.isBigObj()), parent)) + return ec; + _association.insert(std::make_pair(parent, sec)); + } + } + + // The sections that does not have auxiliary symbol are regular sections, in + // which symbols are not allowed to be merged. + for (const auto §ion : _obj->sections()) { + const coff_section *sec = _obj->getCOFFSection(section); + if (!_merge.count(sec)) + _merge[sec] = DefinedAtom::mergeNo; + } + return std::error_code(); +} + +/// Atomize \p symbols and append the results to \p atoms. The symbols are +/// assumed to have been defined in the \p section. +std::error_code FileCOFF::AtomizeDefinedSymbolsInSection( + const coff_section *section, SymbolVectorT &symbols, + std::vector<COFFDefinedFileAtom *> &atoms) { + // Sort symbols by position. + std::stable_sort( + symbols.begin(), symbols.end(), + [](llvm::object::COFFSymbolRef a, llvm::object::COFFSymbolRef b) + -> bool { return a.getValue() < b.getValue(); }); + + StringRef sectionName; + if (std::error_code ec = _obj->getSectionName(section, sectionName)) + return ec; + + // BSS section does not have contents. If this is the BSS section, create + // COFFBSSAtom instead of COFFDefinedAtom. + if (section->Characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { + llvm::object::COFFSymbolRef sym = *si; + uint32_t size = (si + 1 == se) ? section->SizeOfRawData - sym.getValue() + : si[1].getValue() - sym.getValue(); + auto *atom = new (_alloc) COFFBSSAtom( + *this, _symbolName[sym], getScope(sym), getPermissions(section), + DefinedAtom::mergeAsWeakAndAddressUsed, size, getNextOrdinal()); + atoms.push_back(atom); + _symbolAtom[sym] = atom; + } + return std::error_code(); + } + + ArrayRef<uint8_t> secData; + if (std::error_code ec = _obj->getSectionContents(section, secData)) + return ec; + + // A section with IMAGE_SCN_LNK_{INFO,REMOVE} attribute will never become + // a part of the output image. That's what the COFF spec says. + if (section->Characteristics & llvm::COFF::IMAGE_SCN_LNK_INFO || + section->Characteristics & llvm::COFF::IMAGE_SCN_LNK_REMOVE) + return std::error_code(); + + // Supporting debug info needs more work than just linking and combining + // .debug sections. We don't support it yet. Let's discard .debug sections at + // the very beginning of the process so that we don't spend time on linking + // blobs that nobody would understand. + if ((section->Characteristics & llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE) && + (sectionName == ".debug" || sectionName.startswith(".debug$"))) { + return std::error_code(); + } + + DefinedAtom::ContentType type = getContentType(section); + DefinedAtom::ContentPermissions perms = getPermissions(section); + uint64_t sectionSize = section->SizeOfRawData; + bool isComdat = (_comdatSections.count(section) == 1); + + // Create an atom for the entire section. + if (symbols.empty()) { + ArrayRef<uint8_t> data(secData.data(), secData.size()); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", sectionName, sectionSize, Atom::scopeTranslationUnit, + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _definedAtomLocations[section].insert(std::make_pair(0, atom)); + return std::error_code(); + } + + // Create an unnamed atom if the first atom isn't at the start of the + // section. + if (symbols[0].getValue() != 0) { + uint64_t size = symbols[0].getValue(); + ArrayRef<uint8_t> data(secData.data(), size); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", sectionName, sectionSize, Atom::scopeTranslationUnit, + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _definedAtomLocations[section].insert(std::make_pair(0, atom)); + } + + for (auto si = symbols.begin(), se = symbols.end(); si != se; ++si) { + const uint8_t *start = secData.data() + si->getValue(); + // if this is the last symbol, take up the remaining data. + const uint8_t *end = (si + 1 == se) ? secData.data() + secData.size() + : secData.data() + (si + 1)->getValue(); + ArrayRef<uint8_t> data(start, end); + auto *atom = new (_alloc) COFFDefinedAtom( + *this, _symbolName[*si], sectionName, sectionSize, getScope(*si), + type, isComdat, perms, _merge[section], data, getNextOrdinal()); + atoms.push_back(atom); + _symbolAtom[*si] = atom; + _definedAtomLocations[section].insert(std::make_pair(si->getValue(), atom)); + } + return std::error_code(); +} + +std::error_code FileCOFF::AtomizeDefinedSymbols( + SectionToSymbolsT &definedSymbols, + std::vector<const DefinedAtom *> &definedAtoms) { + // For each section, make atoms for all the symbols defined in the + // section, and append the atoms to the result objects. + for (auto &i : definedSymbols) { + const coff_section *section = i.first; + SymbolVectorT &symbols = i.second; + std::vector<COFFDefinedFileAtom *> atoms; + if (std::error_code ec = + AtomizeDefinedSymbolsInSection(section, symbols, atoms)) + return ec; + + // Set alignment to the first atom so that the section contents + // will be aligned as specified by the object section header. + if (atoms.size() > 0) + atoms[0]->setAlignment(getAlignment(section)); + + // Connect atoms with layout-after edges. It prevents atoms + // from being GC'ed if there is a reference to one of the atoms + // in the same layout-after chain. In such case we want to emit + // all the atoms appeared in the same chain, because the "live" + // atom may reference other atoms in the same chain. + if (atoms.size() >= 2) + for (auto it = atoms.begin(), e = atoms.end(); it + 1 != e; ++it) + addLayoutEdge(*it, *(it + 1), lld::Reference::kindLayoutAfter); + + for (COFFDefinedFileAtom *atom : atoms) { + _sectionAtoms[section].push_back(atom); + definedAtoms.push_back(atom); + } + } + + // A COMDAT section with SELECT_ASSOCIATIVE attribute refer to other + // section. If the referred section is linked to a binary, the + // referring section needs to be linked too. A typical use case of + // this attribute is a static initializer; a parent is a comdat BSS + // section, and a child is a static initializer code for the data. + // + // We add referring section contents to the referred section's + // associate list, so that Resolver takes care of them. + for (auto i : _association) { + const coff_section *parent = i.first; + const coff_section *child = i.second; + if (_sectionAtoms.count(child)) { + COFFDefinedFileAtom *p = _sectionAtoms[parent][0]; + p->addAssociate(_sectionAtoms[child][0]); + } + } + + return std::error_code(); +} + +/// Find the atom that is at \p targetAddress in \p section. +std::error_code FileCOFF::findAtomAt(const coff_section *section, + uint32_t targetAddress, + COFFDefinedFileAtom *&result, + uint32_t &offsetInAtom) { + auto loc = _definedAtomLocations.find(section); + if (loc == _definedAtomLocations.end()) + return llvm::object::object_error::parse_failed; + std::multimap<uint32_t, COFFDefinedAtom *> &map = loc->second; + + auto it = map.upper_bound(targetAddress); + if (it == map.begin()) + return llvm::object::object_error::parse_failed; + --it; + uint32_t atomAddress = it->first; + result = it->second; + offsetInAtom = targetAddress - atomAddress; + return std::error_code(); +} + +/// Find the atom for the symbol that was at the \p index in the symbol +/// table. +std::error_code FileCOFF::getAtomBySymbolIndex(uint32_t index, Atom *&ret) { + ErrorOr<llvm::object::COFFSymbolRef> symbol = _obj->getSymbol(index); + if (std::error_code ec = symbol.getError()) + return ec; + ret = _symbolAtom[*symbol]; + assert(ret); + return std::error_code(); +} + +/// Add relocation information to an atom based on \p rel. \p rel is an +/// relocation entry for the \p section, and \p atoms are all the atoms +/// defined in the \p section. +std::error_code FileCOFF::addRelocationReference( + const coff_relocation *rel, const coff_section *section) { + // The address of the item which relocation is applied. Section's + // VirtualAddress needs to be added for historical reasons, but the value + // is usually just zero, so adding it is usually no-op. + uint32_t itemAddress = rel->VirtualAddress + section->VirtualAddress; + + Atom *targetAtom = nullptr; + if (std::error_code ec = + getAtomBySymbolIndex(rel->SymbolTableIndex, targetAtom)) + return ec; + + COFFDefinedFileAtom *atom; + uint32_t offsetInAtom; + if (std::error_code ec = findAtomAt(section, itemAddress, atom, offsetInAtom)) + return ec; + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, _referenceArch, rel->Type, offsetInAtom, + targetAtom, 0)); + return std::error_code(); +} + +// Read section contents. +std::error_code FileCOFF::getSectionContents(StringRef sectionName, + ArrayRef<uint8_t> &result) { + const coff_section *section = nullptr; + if (std::error_code ec = findSection(sectionName, section)) + return ec; + if (!section) + return std::error_code(); + if (std::error_code ec = _obj->getSectionContents(section, result)) + return ec; + return std::error_code(); +} + +AliasAtom * +FileCOFF::createAlias(StringRef name, const DefinedAtom *target, int cnt) { + AliasAtom *alias = new (_alloc) AliasAtom(*this, name); + alias->addReference(Reference::KindNamespace::all, Reference::KindArch::all, + Reference::kindLayoutAfter, 0, target, 0); + alias->setMerge(DefinedAtom::mergeAsWeak); + if (target->contentType() == DefinedAtom::typeCode) + alias->setDeadStrip(DefinedAtom::deadStripNever); + alias->setOrdinal(target->ordinal() - cnt); + return alias; +} + +void FileCOFF::createAlternateNameAtoms() { + std::vector<AliasAtom *> aliases; + for (const DefinedAtom *atom : defined()) { + int cnt = 1; + for (StringRef alias : _ctx.getAlternateNames(atom->name())) + aliases.push_back(createAlias(alias, atom, cnt++)); + } + for (AliasAtom *alias : aliases) + _definedAtoms._atoms.push_back(alias); +} + +// Interpret the contents of .drectve section. If exists, the section contains +// a string containing command line options. The linker is expected to +// interpret the options as if they were given via the command line. +// +// The section mainly contains /defaultlib (-l in Unix), but can contain any +// options as long as they are valid. +std::error_code +FileCOFF::parseDirectiveSection(StringRef directives) { + DEBUG(llvm::dbgs() << ".drectve: " << directives << "\n"); + + // Split the string into tokens, as the shell would do for argv. + SmallVector<const char *, 16> tokens; + tokens.push_back("link"); // argv[0] is the command name. Will be ignored. + llvm::cl::TokenizeWindowsCommandLine(directives, _stringSaver, tokens); + tokens.push_back(nullptr); + + // Calls the command line parser to interpret the token string as if they + // were given via the command line. + int argc = tokens.size() - 1; + const char **argv = &tokens[0]; + std::string errorMessage; + llvm::raw_string_ostream stream(errorMessage); + PECOFFLinkingContext::ParseDirectives parseDirectives = + _ctx.getParseDirectives(); + bool parseFailed = !parseDirectives(argc, argv, _ctx, stream); + stream.flush(); + // Print error message if error. + if (parseFailed) { + return make_dynamic_error_code( + Twine("Failed to parse '") + directives + "'\n" + + "Reason: " + errorMessage); + } + if (!errorMessage.empty()) { + llvm::errs() << "lld warning: " << errorMessage << "\n"; + } + return std::error_code(); +} + +/// Returns the target machine type of the current object file. +std::error_code FileCOFF::getReferenceArch(Reference::KindArch &result) { + switch (_obj->getMachine()) { + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + result = Reference::KindArch::x86; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + result = Reference::KindArch::x86_64; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + result = Reference::KindArch::ARM; + return std::error_code(); + case llvm::COFF::IMAGE_FILE_MACHINE_UNKNOWN: + result = Reference::KindArch::all; + return std::error_code(); + } + llvm::errs() << "Unsupported machine type: 0x" + << llvm::utohexstr(_obj->getMachine()) << '\n'; + return llvm::object::object_error::parse_failed; +} + +/// Add relocation information to atoms. +std::error_code FileCOFF::addRelocationReferenceToAtoms() { + // Relocation entries are defined for each section. + for (const auto &sec : _obj->sections()) { + const coff_section *section = _obj->getCOFFSection(sec); + + // Skip if there's no atom for the section. Currently we do not create any + // atoms for some sections, such as "debug$S", and such sections need to + // be skipped here too. + if (_sectionAtoms.find(section) == _sectionAtoms.end()) + continue; + + for (const auto &reloc : sec.relocations()) { + const coff_relocation *rel = _obj->getCOFFRelocation(reloc); + if (auto ec = addRelocationReference(rel, section)) + return ec; + } + } + return std::error_code(); +} + +// Read .sxdata section if exists. .sxdata is a x86-only section that contains a +// vector of symbol offsets. The symbols pointed by this section are SEH handler +// functions contained in the same object file. The linker needs to construct a +// SEH table and emit it to executable. +// +// On x86, exception handler addresses are in stack, so they are vulnerable to +// stack overflow attack. In order to protect against it, Windows runtime uses +// the SEH table to check if a SEH handler address in stack is a real address of +// a handler created by compiler. +// +// What we want to emit from the linker is a vector of SEH handler VAs, but here +// we have a vector of offsets to the symbol table. So we convert the latter to +// the former. +std::error_code FileCOFF::maybeCreateSXDataAtoms() { + ArrayRef<uint8_t> sxdata; + if (std::error_code ec = getSectionContents(".sxdata", sxdata)) + return ec; + if (sxdata.empty()) + return std::error_code(); + + auto *atom = new (_alloc) COFFDefinedAtom( + *this, "", ".sxdata", 0, Atom::scopeTranslationUnit, + DefinedAtom::typeData, false /*isComdat*/, DefinedAtom::permR__, + DefinedAtom::mergeNo, sxdata, getNextOrdinal()); + + const ulittle32_t *symbolIndex = + reinterpret_cast<const ulittle32_t *>(sxdata.data()); + int numSymbols = sxdata.size() / sizeof(uint32_t); + + for (int i = 0; i < numSymbols; ++i) { + Atom *handlerFunc; + if (std::error_code ec = getAtomBySymbolIndex(symbolIndex[i], handlerFunc)) + return ec; + int offsetInAtom = i * sizeof(uint32_t); + + uint16_t rtype; + switch (_obj->getMachine()) { + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + rtype = llvm::COFF::IMAGE_REL_AMD64_ADDR32; + break; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + rtype = llvm::COFF::IMAGE_REL_I386_DIR32; + break; + default: + llvm_unreachable("unsupported machine type"); + } + + atom->addReference(llvm::make_unique<SimpleReference>( + Reference::KindNamespace::COFF, _referenceArch, rtype, offsetInAtom, + handlerFunc, 0)); + } + + _definedAtoms._atoms.push_back(atom); + return std::error_code(); +} + +/// Find a section by name. +std::error_code FileCOFF::findSection(StringRef name, + const coff_section *&result) { + for (const auto &sec : _obj->sections()) { + const coff_section *section = _obj->getCOFFSection(sec); + StringRef sectionName; + if (auto ec = _obj->getSectionName(section, sectionName)) + return ec; + if (sectionName == name) { + result = section; + return std::error_code(); + } + } + // Section was not found, but it's not an error. This method returns + // an error only when there's a read error. + return std::error_code(); +} + +// Convert ArrayRef<uint8_t> to std::string. The array contains a string which +// may not be terminated by NUL. +StringRef FileCOFF::ArrayRefToString(ArrayRef<uint8_t> array) { + // .drectve sections are encoded in either ASCII or UTF-8 with BOM. + // The PE/COFF spec allows ANSI (Windows-1252 encoding), but seems + // it's no longer in use. + // Skip a UTF-8 byte marker if exists. + if (array.size() >= 3 && array[0] == 0xEF && array[1] == 0xBB && + array[2] == 0xBF) { + array = array.slice(3); + } + if (array.empty()) + return ""; + StringRef s(reinterpret_cast<const char *>(array.data()), array.size()); + s = s.substr(0, s.find_first_of('\0')); + std::string *contents = new (_alloc) std::string(s.data(), s.size()); + return StringRef(*contents).trim(); +} + +// getNextOrdinal returns a monotonically increasaing uint64_t number +// starting from 1. There's a large gap between two numbers returned +// from this function, so that you can put other atoms between them. +uint64_t FileCOFF::getNextOrdinal() { + return _ordinal++ << 32; +} + +class COFFObjectReader : public Reader { +public: + COFFObjectReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef ext, + const MemoryBuffer &) const override { + return magic == llvm::sys::fs::file_magic::coff_object; + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const Registry &, + std::vector<std::unique_ptr<File>> &result) const override { + // Parse the memory buffer as PECOFF file. + auto *file = new FileCOFF(std::move(mb), _ctx); + result.push_back(std::unique_ptr<File>(file)); + return std::error_code(); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +using namespace llvm::COFF; + +const Registry::KindStrings kindStringsI386[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR16), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_REL16), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_DIR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SEG12), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_SECREL7), + LLD_KIND_STRING_ENTRY(IMAGE_REL_I386_REL32), + LLD_KIND_STRING_END}; + +const Registry::KindStrings kindStringsAMD64[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR64), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_ADDR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_1), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_2), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_3), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_4), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_REL32_5), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SECREL7), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SREL32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_PAIR), + LLD_KIND_STRING_ENTRY(IMAGE_REL_AMD64_SSPAN32), + LLD_KIND_STRING_END}; + +const Registry::KindStrings kindStringsARMNT[] = { + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ABSOLUTE), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ADDR32), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_ADDR32NB), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH24), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH11), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_TOKEN), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX24), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX11), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_SECTION), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_SECREL), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_MOV32A), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_MOV32T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH20T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BRANCH24T), + LLD_KIND_STRING_ENTRY(IMAGE_REL_ARM_BLX23T), +}; + +} // end namespace anonymous + +namespace lld { + +void Registry::addSupportCOFFObjects(PECOFFLinkingContext &ctx) { + add(std::unique_ptr<Reader>(new COFFObjectReader(ctx))); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::x86, + kindStringsI386); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::x86_64, + kindStringsAMD64); + addKindTable(Reference::KindNamespace::COFF, Reference::KindArch::ARM, + kindStringsARMNT); +} + +} diff --git a/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp new file mode 100644 index 0000000000000..8c9641376a0d2 --- /dev/null +++ b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp @@ -0,0 +1,389 @@ +//===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file \brief This file provides a way to read an import library member in a +/// .lib file. +/// +/// Archive Files in Windows +/// ======================== +/// +/// In Windows, archive files with .lib file extension serve two different +/// purposes. +/// +/// - For static linking: An archive file in this use case contains multiple +/// regular .obj files and is used for static linking. This is the same +/// usage as .a file in Unix. +/// +/// - For dynamic linking: An archive file in this use case contains pseudo +/// .obj files to describe exported symbols of a DLL. Each pseudo .obj file +/// in an archive has a name of an exported symbol and a DLL filename from +/// which the symbol can be imported. When you link a DLL on Windows, you +/// pass the name of the .lib file for the DLL instead of the DLL filename +/// itself. That is the Windows way of linking against a shared library. +/// +/// This file contains a function to handle the pseudo object file. +/// +/// Windows Loader and Import Address Table +/// ======================================= +/// +/// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs +/// contains a list of DLL names and list of symbols that need to be resolved by +/// the loader. Windows loader maps the executable and all the DLLs to memory, +/// resolves the symbols referencing items in DLLs, and updates the import +/// address table (IAT) in memory. The IAT is an array of pointers to all of the +/// data or functions in DLL referenced by the executable. You cannot access +/// items in DLLs directly. They have to be accessed through an extra level of +/// indirection. +/// +/// So, if you want to access an item in DLL, you have to go through a +/// pointer. How do you actually do that? You need a symbol for a pointer in the +/// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is +/// exported from the DLL for an IAT entry. For example, if you have a global +/// variable "foo" in a DLL, a pointer to the variable is available as +/// "_imp__foo". The IAT is an array of _imp__ symbols. +/// +/// Is this OK? That's not that complicated. Because items in a DLL are not +/// directly accessible, you need to access through a pointer, and the pointer +/// is available as a symbol with _imp__ prefix. +/// +/// Note 1: Although you can write code with _imp__ prefix, today's compiler and +/// linker let you write code as if there's no extra level of indirection. +/// That's why you haven't seen lots of _imp__ in your code. A variable or a +/// function declared with "dllimport" attribute is treated as an item in a DLL, +/// and the compiler automatically mangles its name and inserts the extra level +/// of indirection when accessing the item. Here are some examples: +/// +/// __declspec(dllimport) int var_in_dll; +/// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3; +/// +/// __declspec(dllimport) int fn_in_dll(void); +/// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)(); +/// +/// It's just the compiler rewrites code for you so that you don't need to +/// handle the indirection yourself. +/// +/// Note 2: __declspec(dllimport) is mandatory for data but optional for +/// function. For a function, the linker creates a jump table with the original +/// symbol name, so that the function is accessible without _imp__ prefix. The +/// same function in a DLL can be called through two different symbols if it's +/// not dllimport'ed. +/// +/// (*_imp__fn)() +/// fn() +/// +/// The above functions do the same thing. fn's content is a JMP instruction to +/// branch to the address pointed by _imp__fn. The latter may be a little bit +/// slower than the former because it will execute the extra JMP instruction, +/// but that's usually negligible. +/// +/// If a function is dllimport'ed, which is usually done in a header file, +/// mangled name will be used at compile time so the jump table will not be +/// used. +/// +/// Because there's no way to hide the indirection for data access at link time, +/// data has to be accessed through dllimport'ed symbols or explicit _imp__ +/// prefix. +/// +/// Idata Sections in the Pseudo Object File +/// ======================================== +/// +/// The object file created by cl.exe has several sections whose name starts +/// with ".idata$" followed by a number. The contents of the sections seem the +/// fragments of a complete ".idata" section. These sections has relocations for +/// the data referenced from the idata secton. Generally, the linker discards +/// "$" and all characters that follow from the section name and merges their +/// contents to one section. So, it looks like if everything would work fine, +/// the idata section would naturally be constructed without having any special +/// code for doing that. +/// +/// However, the LLD linker cannot do that. An idata section constructed in that +/// way was never be in valid format. We don't know the reason yet. Our +/// assumption on the idata fragment could simply be wrong, or the LLD linker is +/// not powerful enough to do the job. Meanwhile, we construct the idata section +/// ourselves. All the "idata$" sections in the pseudo object file are currently +/// ignored. +/// +/// Creating Atoms for the Import Address Table +/// =========================================== +/// +/// The function in this file reads a pseudo object file and creates at most two +/// atoms. One is a shared library atom for _imp__ symbol. The another is a +/// defined atom for the JMP instruction if the symbol is for a function. +/// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <cstring> +#include <map> +#include <system_error> +#include <vector> + +using namespace lld; +using namespace lld::pecoff; +using namespace llvm; +using namespace llvm::support::endian; + +#define DEBUG_TYPE "ReaderImportHeader" + +namespace lld { + +namespace { + +// This code is valid both in x86 and x64. +const uint8_t FuncAtomContentX86[] = { + 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0 + 0xcc, 0xcc // INT 3; INT 3 +}; + +const uint8_t FuncAtomContentARMNT[] = { + 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0 + 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0 + 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip] +}; + +static void setJumpInstTarget(COFFLinkerInternalAtom *src, const Atom *dst, + int off, MachineTypes machine) { + SimpleReference *ref; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86, + llvm::COFF::IMAGE_REL_I386_DIR32, + off, dst, 0); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::x86_64, + llvm::COFF::IMAGE_REL_AMD64_REL32, + off, dst, 0); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + ref = new SimpleReference(Reference::KindNamespace::COFF, + Reference::KindArch::ARM, + llvm::COFF::IMAGE_REL_ARM_MOV32T, + off, dst, 0); + break; + } + src->addReference(std::unique_ptr<SimpleReference>(ref)); +} + +/// The defined atom for jump table. +class FuncAtom : public COFFLinkerInternalAtom { +public: + FuncAtom(const File &file, StringRef symbolName, + const COFFSharedLibraryAtom *impAtom, MachineTypes machine) + : COFFLinkerInternalAtom(file, /*oridnal*/ 0, createContent(machine), + symbolName) { + size_t Offset; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + Offset = 2; + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + Offset = 0; + break; + } + + setJumpInstTarget(this, impAtom, Offset, machine); + } + + uint64_t ordinal() const override { return 0; } + Scope scope() const override { return scopeGlobal; } + ContentType contentType() const override { return typeCode; } + Alignment alignment() const override { return Alignment(1); } + ContentPermissions permissions() const override { return permR_X; } + +private: + std::vector<uint8_t> createContent(MachineTypes machine) const { + const uint8_t *Data; + size_t Size; + + switch (machine) { + default: llvm::report_fatal_error("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + Data = FuncAtomContentX86; + Size = sizeof(FuncAtomContentX86); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + Data = FuncAtomContentARMNT; + Size = sizeof(FuncAtomContentARMNT); + break; + } + + return std::vector<uint8_t>(Data, Data + Size); + } +}; + +class FileImportLibrary : public File { +public: + FileImportLibrary(std::unique_ptr<MemoryBuffer> mb, MachineTypes machine) + : File(mb->getBufferIdentifier(), kindSharedLibrary), + _mb(std::move(mb)), _machine(machine) {} + + std::error_code doParse() override { + const char *buf = _mb->getBufferStart(); + const char *end = _mb->getBufferEnd(); + + // The size of the string that follows the header. + uint32_t dataSize + = read32le(buf + offsetof(COFF::ImportHeader, SizeOfData)); + + // Check if the total size is valid. + if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize) + return make_error_code(NativeReaderError::unknown_file_format); + + uint16_t hint = read16le(buf + offsetof(COFF::ImportHeader, OrdinalHint)); + StringRef symbolName(buf + sizeof(COFF::ImportHeader)); + StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1); + + // TypeInfo is a bitfield. The least significant 2 bits are import + // type, followed by 3 bit import name type. + uint16_t typeInfo = read16le(buf + offsetof(COFF::ImportHeader, TypeInfo)); + int type = typeInfo & 0x3; + int nameType = (typeInfo >> 2) & 0x7; + + // Symbol name used by the linker may be different from the symbol name used + // by the loader. The latter may lack symbol decorations, or may not even + // have name if it's imported by ordinal. + StringRef importName = symbolNameToImportName(symbolName, nameType); + + const COFFSharedLibraryAtom *dataAtom = + addSharedLibraryAtom(hint, symbolName, importName, dllName); + if (type == llvm::COFF::IMPORT_CODE) + addFuncAtom(symbolName, dllName, dataAtom); + + return std::error_code(); + } + + const atom_collection<DefinedAtom> &defined() const override { + return _definedAtoms; + } + + const atom_collection<UndefinedAtom> &undefined() const override { + return _noUndefinedAtoms; + } + + const atom_collection<SharedLibraryAtom> &sharedLibrary() const override { + return _sharedLibraryAtoms; + } + + const atom_collection<AbsoluteAtom> &absolute() const override { + return _noAbsoluteAtoms; + } + +private: + const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint, + StringRef symbolName, + StringRef importName, + StringRef dllName) { + auto *atom = new (_alloc) + COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName); + _sharedLibraryAtoms._atoms.push_back(atom); + return atom; + } + + void addFuncAtom(StringRef symbolName, StringRef dllName, + const COFFSharedLibraryAtom *impAtom) { + auto *atom = new (_alloc) FuncAtom(*this, symbolName, impAtom, _machine); + _definedAtoms._atoms.push_back(atom); + } + + atom_collection_vector<DefinedAtom> _definedAtoms; + atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms; + mutable llvm::BumpPtrAllocator _alloc; + + // Does the same thing as StringRef::ltrim() but removes at most one + // character. + StringRef ltrim1(StringRef str, const char *chars) const { + if (!str.empty() && strchr(chars, str[0])) + return str.substr(1); + return str; + } + + // Convert the given symbol name to the import symbol name exported by the + // DLL. + StringRef symbolNameToImportName(StringRef symbolName, int nameType) const { + StringRef ret; + switch (nameType) { + case llvm::COFF::IMPORT_ORDINAL: + // The import is by ordinal. No symbol name will be used to identify the + // item in the DLL. Only its ordinal will be used. + return ""; + case llvm::COFF::IMPORT_NAME: + // The import name in this case is identical to the symbol name. + return symbolName; + case llvm::COFF::IMPORT_NAME_NOPREFIX: + // The import name is the symbol name without leading ?, @ or _. + ret = ltrim1(symbolName, "?@_"); + break; + case llvm::COFF::IMPORT_NAME_UNDECORATE: + // Similar to NOPREFIX, but we also need to truncate at the first @. + ret = ltrim1(symbolName, "?@_"); + ret = ret.substr(0, ret.find('@')); + break; + } + std::string *str = new (_alloc) std::string(ret); + return *str; + } + + std::unique_ptr<MemoryBuffer> _mb; + MachineTypes _machine; +}; + +class COFFImportLibraryReader : public Reader { +public: + COFFImportLibraryReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, StringRef, + const MemoryBuffer &mb) const override { + if (mb.getBufferSize() < sizeof(COFF::ImportHeader)) + return false; + return (magic == llvm::sys::fs::file_magic::coff_import_library); + } + + std::error_code + loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &, + std::vector<std::unique_ptr<File> > &result) const override { + auto *file = new FileImportLibrary(std::move(mb), _ctx.getMachineType()); + result.push_back(std::unique_ptr<File>(file)); + return std::error_code(); + } + +private: + PECOFFLinkingContext &_ctx; +}; + +} // end anonymous namespace + +void Registry::addSupportCOFFImportLibraries(PECOFFLinkingContext &ctx) { + add(llvm::make_unique<COFFImportLibraryReader>(ctx)); +} + +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp b/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp new file mode 100644 index 0000000000000..fd3360f018b6d --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp @@ -0,0 +1,118 @@ +//===- lib/ReaderWriter/PECOFF/WriterImportLibrary.cpp --------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This file is responsible for creating the Import Library file. +/// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" + +namespace lld { +namespace pecoff { + +/// Creates a .def file containing the list of exported symbols. +static std::string +createModuleDefinitionFile(const PECOFFLinkingContext &ctx) { + std::string ret; + llvm::raw_string_ostream os(ret); + os << "LIBRARY \"" << llvm::sys::path::filename(ctx.outputPath()) << "\"\n" + << "EXPORTS\n"; + + for (const PECOFFLinkingContext::ExportDesc &desc : ctx.getDllExports()) { + // Symbol names in a module-definition file will be mangled by lib.exe, + // so we need to demangle them before writing to a .def file. + os << " "; + if (!desc.externalName.empty()) { + os << desc.externalName; + } else if (!desc.mangledName.empty()) { + os << ctx.undecorateSymbol(desc.mangledName); + } else { + os << ctx.undecorateSymbol(desc.name); + } + + if (!desc.isPrivate) + os << " @" << desc.ordinal; + if (desc.noname) + os << " NONAME"; + if (desc.isData) + os << " DATA"; + if (desc.isPrivate) + os << " PRIVATE"; + os << "\n"; + } + os.flush(); + return ret; +} + +static std::string writeToTempFile(StringRef contents) { + SmallString<128> path; + int fd; + if (llvm::sys::fs::createTemporaryFile("tmp", "def", fd, path)) { + llvm::errs() << "Failed to create temporary file\n"; + return ""; + } + llvm::raw_fd_ostream os(fd, /*shouldClose*/ true); + os << contents; + return path.str(); +} + +static void writeTo(StringRef path, StringRef contents) { + int fd; + if (llvm::sys::fs::openFileForWrite(path, fd, llvm::sys::fs::F_Text)) { + llvm::errs() << "Failed to open " << path << "\n"; + return; + } + llvm::raw_fd_ostream os(fd, /*shouldClose*/ true); + os << contents; +} + +/// Creates a .def file and runs lib.exe on it to create an import library. +void writeImportLibrary(const PECOFFLinkingContext &ctx) { + std::string fileContents = createModuleDefinitionFile(ctx); + + std::string program = "lib.exe"; + ErrorOr<std::string> programPathOrErr = llvm::sys::findProgramByName(program); + if (!programPathOrErr) { + llvm::errs() << "Unable to find " << program << " in PATH\n"; + } else { + const std::string &programPath = *programPathOrErr; + + std::string defPath = writeToTempFile(fileContents); + llvm::FileRemover tmpFile(defPath); + + std::string defArg = "/def:"; + defArg.append(defPath); + std::string outputArg = "/out:"; + outputArg.append(ctx.getOutputImportLibraryPath()); + + std::vector<const char *> args; + args.push_back(programPath.c_str()); + args.push_back("/nologo"); + args.push_back(ctx.is64Bit() ? "/machine:x64" : "/machine:x86"); + args.push_back(defArg.c_str()); + args.push_back(outputArg.c_str()); + args.push_back(nullptr); + + if (llvm::sys::ExecuteAndWait(programPath.c_str(), &args[0]) != 0) + llvm::errs() << program << " failed\n"; + } + + // If /lldmoduledeffile:<filename> is given, make a copy of the + // temporary module definition file. This feature is for unit tests. + if (!ctx.getModuleDefinitionFile().empty()) + writeTo(ctx.getModuleDefinitionFile(), fileContents); +} + +} // end namespace pecoff +} // end namespace lld diff --git a/lib/ReaderWriter/PECOFF/WriterImportLibrary.h b/lib/ReaderWriter/PECOFF/WriterImportLibrary.h new file mode 100644 index 0000000000000..a51b9a3648c5d --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterImportLibrary.h @@ -0,0 +1,23 @@ +//===- lib/ReaderWriter/PECOFF/WriterImportLibrary.h ----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_PE_COFF_WRITER_IMPORT_LIBRARY_H +#define LLD_READER_WRITER_PE_COFF_WRITER_IMPORT_LIBRARY_H + +namespace lld { +class PECOFFLinkingContext; + +namespace pecoff { + +void writeImportLibrary(const PECOFFLinkingContext &ctx); + +} // end namespace pecoff +} // end namespace lld + +#endif diff --git a/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp b/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp new file mode 100644 index 0000000000000..d34e2d3d63fde --- /dev/null +++ b/lib/ReaderWriter/PECOFF/WriterPECOFF.cpp @@ -0,0 +1,1417 @@ +//===- lib/ReaderWriter/PECOFF/WriterPECOFF.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// PE/COFF file consists of DOS Header, PE Header, COFF Header and Section +/// Tables followed by raw section data. +/// +/// This writer is responsible for writing Core Linker results to an Windows +/// executable file. +/// +/// This writer currently supports 32 bit PE/COFF for x86 processor only. +/// +//===----------------------------------------------------------------------===// + +#include "Atoms.h" +#include "WriterImportLibrary.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/AtomLayout.h" +#include "lld/ReaderWriter/PECOFFLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Object/COFF.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include <algorithm> +#include <cstdlib> +#include <map> +#include <time.h> +#include <vector> + +#define DEBUG_TYPE "WriterPECOFF" + +using namespace llvm::support::endian; + +using llvm::COFF::DataDirectoryIndex; +using llvm::object::coff_runtime_function_x64; +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +namespace lld { +namespace pecoff { + +// Disk sector size. Some data needs to be aligned at disk sector boundary in +// file. +static const int SECTOR_SIZE = 512; + +namespace { +class SectionChunk; + +/// A Chunk is an abstract contiguous range in an output file. +class Chunk { +public: + enum Kind { + kindHeader, + kindSection, + kindStringTable, + kindAtomChunk + }; + + explicit Chunk(Kind kind) : _kind(kind), _size(0) {} + virtual ~Chunk() {} + virtual void write(uint8_t *buffer) = 0; + virtual uint64_t size() const { return _size; } + virtual uint64_t onDiskSize() const { return size(); } + virtual uint64_t align() const { return 1; } + + uint64_t fileOffset() const { return _fileOffset; } + void setFileOffset(uint64_t fileOffset) { _fileOffset = fileOffset; } + Kind getKind() const { return _kind; } + +protected: + Kind _kind; + uint64_t _size; + uint64_t _fileOffset; +}; + +/// A HeaderChunk is an abstract class to represent a file header for +/// PE/COFF. The data in the header chunk is metadata about program and will +/// be consumed by the windows loader. HeaderChunks are not mapped to memory +/// when executed. +class HeaderChunk : public Chunk { +public: + HeaderChunk() : Chunk(kindHeader) {} + + static bool classof(const Chunk *c) { return c->getKind() == kindHeader; } +}; + +/// A DOSStubChunk represents the DOS compatible header at the beginning +/// of PE/COFF files. +class DOSStubChunk : public HeaderChunk { +public: + explicit DOSStubChunk(const PECOFFLinkingContext &ctx) + : HeaderChunk(), _context(ctx) { + // Minimum size of DOS stub is 64 bytes. The next block (PE header) needs to + // be aligned on 8 byte boundary. + size_t size = std::max(_context.getDosStub().size(), (size_t)64); + _size = llvm::RoundUpToAlignment(size, 8); + } + + void write(uint8_t *buffer) override { + ArrayRef<uint8_t> array = _context.getDosStub(); + std::memcpy(buffer, array.data(), array.size()); + auto *header = reinterpret_cast<llvm::object::dos_header *>(buffer); + header->AddressOfRelocationTable = sizeof(llvm::object::dos_header); + header->AddressOfNewExeHeader = _size; + } + +private: + const PECOFFLinkingContext &_context; +}; + +/// A PEHeaderChunk represents PE header including COFF header. +template <class PEHeader> +class PEHeaderChunk : public HeaderChunk { +public: + explicit PEHeaderChunk(const PECOFFLinkingContext &ctx); + + void write(uint8_t *buffer) override; + + void setSizeOfHeaders(uint64_t size) { + // Must be multiple of FileAlignment. + _peHeader.SizeOfHeaders = llvm::RoundUpToAlignment(size, SECTOR_SIZE); + } + + void setSizeOfCode(uint64_t size) { _peHeader.SizeOfCode = size; } + void setBaseOfCode(uint32_t rva) { _peHeader.BaseOfCode = rva; } + void setBaseOfData(uint32_t rva); + void setSizeOfImage(uint32_t size) { _peHeader.SizeOfImage = size; } + + void setSizeOfInitializedData(uint64_t size) { + _peHeader.SizeOfInitializedData = size; + } + + void setSizeOfUninitializedData(uint64_t size) { + _peHeader.SizeOfUninitializedData = size; + } + + void setNumberOfSections(uint32_t num) { _coffHeader.NumberOfSections = num; } + void setNumberOfSymbols(uint32_t num) { _coffHeader.NumberOfSymbols = num; } + + void setAddressOfEntryPoint(uint32_t address) { + _peHeader.AddressOfEntryPoint = address; + } + + void setPointerToSymbolTable(uint32_t rva) { + _coffHeader.PointerToSymbolTable = rva; + } + +private: + llvm::object::coff_file_header _coffHeader; + PEHeader _peHeader; +}; + +/// A SectionHeaderTableChunk represents Section Table Header of PE/COFF +/// format, which is a list of section headers. +class SectionHeaderTableChunk : public HeaderChunk { +public: + SectionHeaderTableChunk() : HeaderChunk() {} + void addSection(SectionChunk *chunk); + uint64_t size() const override; + void write(uint8_t *buffer) override; + +private: + static llvm::object::coff_section createSectionHeader(SectionChunk *chunk); + + std::vector<SectionChunk *> _sections; +}; + +class StringTableChunk : public Chunk { +public: + StringTableChunk() : Chunk(kindStringTable) {} + + static bool classof(const Chunk *c) { + return c->getKind() == kindStringTable; + } + + uint32_t addSectionName(StringRef sectionName) { + if (_stringTable.empty()) { + // The string table immediately follows the symbol table. + // We don't really need a symbol table, but some tools (e.g. dumpbin) + // don't like zero-length symbol table. + // Make room for the empty symbol slot, which occupies 18 byte. + // We also need to reserve 4 bytes for the string table header. + int size = sizeof(llvm::object::coff_symbol16) + 4; + _stringTable.insert(_stringTable.begin(), size, 0); + // Set the name of the dummy symbol to the first string table entry. + // It's better than letting dumpbin print out a garabage as a symbol name. + char *off = _stringTable.data() + 4; + write32le(off, 4); + } + uint32_t offset = _stringTable.size(); + _stringTable.insert(_stringTable.end(), sectionName.begin(), + sectionName.end()); + _stringTable.push_back('\0'); + return offset - sizeof(llvm::object::coff_symbol16); + } + + uint64_t size() const override { return _stringTable.size(); } + + void write(uint8_t *buffer) override { + if (_stringTable.empty()) + return; + char *off = _stringTable.data() + sizeof(llvm::object::coff_symbol16); + write32le(off, _stringTable.size()); + std::memcpy(buffer, _stringTable.data(), _stringTable.size()); + } + +private: + std::vector<char> _stringTable; +}; + +class SectionChunk : public Chunk { +public: + uint64_t onDiskSize() const override { + if (_characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return 0; + return llvm::RoundUpToAlignment(size(), SECTOR_SIZE); + } + + uint64_t align() const override { return SECTOR_SIZE; } + uint32_t getCharacteristics() const { return _characteristics; } + StringRef getSectionName() const { return _sectionName; } + virtual uint64_t memAlign() const { return _memAlign; } + + static bool classof(const Chunk *c) { + Kind kind = c->getKind(); + return kind == kindSection || kind == kindAtomChunk; + } + + uint64_t getVirtualAddress() { return _virtualAddress; } + virtual void setVirtualAddress(uint32_t rva) { _virtualAddress = rva; } + + uint32_t getStringTableOffset() const { return _stringTableOffset; } + void setStringTableOffset(uint32_t offset) { _stringTableOffset = offset; } + +protected: + SectionChunk(Kind kind, StringRef sectionName, uint32_t characteristics, + const PECOFFLinkingContext &ctx) + : Chunk(kind), _sectionName(sectionName), + _characteristics(characteristics), _virtualAddress(0), + _stringTableOffset(0), _memAlign(ctx.getPageSize()) {} + +private: + StringRef _sectionName; + const uint32_t _characteristics; + uint64_t _virtualAddress; + uint32_t _stringTableOffset; + uint64_t _memAlign; +}; + +struct BaseReloc { + BaseReloc(uint64_t a, llvm::COFF::BaseRelocationType t) : addr(a), type(t) {} + uint64_t addr; + llvm::COFF::BaseRelocationType type; +}; + +/// An AtomChunk represents a section containing atoms. +class AtomChunk : public SectionChunk { +public: + AtomChunk(const PECOFFLinkingContext &ctx, StringRef name, + const std::vector<const DefinedAtom *> &atoms); + + void write(uint8_t *buffer) override; + + uint64_t memAlign() const override; + void appendAtom(const DefinedAtom *atom); + void buildAtomRvaMap(std::map<const Atom *, uint64_t> &atomRva) const; + + void applyRelocationsARM(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + void applyRelocationsX86(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + void applyRelocationsX64(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress); + + void printAtomAddresses(uint64_t baseAddr) const; + void addBaseRelocations(std::vector<BaseReloc> &relocSites) const; + + void setVirtualAddress(uint32_t rva) override; + uint64_t getAtomVirtualAddress(StringRef name) const; + + static bool classof(const Chunk *c) { return c->getKind() == kindAtomChunk; } + +protected: + std::vector<AtomLayout *> _atomLayouts; + uint64_t _virtualAddress; + +private: + uint32_t + computeCharacteristics(const PECOFFLinkingContext &ctx, StringRef name, + const std::vector<const DefinedAtom *> &atoms) const { + return ctx.getSectionAttributes(name, + getDefaultCharacteristics(name, atoms)); + } + + uint32_t getDefaultCharacteristics( + StringRef name, const std::vector<const DefinedAtom *> &atoms) const; + + mutable llvm::BumpPtrAllocator _alloc; + llvm::COFF::MachineTypes _machineType; + const PECOFFLinkingContext &_ctx; +}; + +/// A DataDirectoryChunk represents data directory entries that follows the PE +/// header in the output file. An entry consists of an 8 byte field that +/// indicates a relative virtual address (the starting address of the entry data +/// in memory) and 8 byte entry data size. +class DataDirectoryChunk : public HeaderChunk { +public: + DataDirectoryChunk() + : HeaderChunk(), _data(std::vector<llvm::object::data_directory>(16)) {} + + uint64_t size() const override { + return sizeof(llvm::object::data_directory) * _data.size(); + } + + void setField(DataDirectoryIndex index, uint32_t addr, uint32_t size); + void write(uint8_t *buffer) override; + +private: + std::vector<llvm::object::data_directory> _data; +}; + +/// A BaseRelocChunk represents ".reloc" section. +/// +/// .reloc section contains a list of addresses. If the PE/COFF loader decides +/// to load the binary at a memory address different from its preferred base +/// address, which is specified by ImageBase field in the COFF header, the +/// loader needs to relocate the binary, so that all the addresses in the binary +/// point to new locations. The loader will do that by fixing up the addresses +/// specified by .reloc section. +/// +/// The executable is almost always loaded at the preferred base address because +/// it's loaded into an empty address space. The DLL is however an subject of +/// load-time relocation because it may conflict with other DLLs or the +/// executable. +class BaseRelocChunk : public SectionChunk { + typedef std::vector<std::unique_ptr<Chunk> > ChunkVectorT; + +public: + BaseRelocChunk(ChunkVectorT &chunks, const PECOFFLinkingContext &ctx) + : SectionChunk(kindSection, ".reloc", characteristics, ctx), + _ctx(ctx), _contents(createContents(chunks)) {} + + void write(uint8_t *buffer) override { + std::memcpy(buffer, &_contents[0], _contents.size()); + } + + uint64_t size() const override { return _contents.size(); } + +private: + // When loaded into memory, reloc section should be readable and writable. + static const uint32_t characteristics = + llvm::COFF::IMAGE_SCN_MEM_READ | + llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + llvm::COFF::IMAGE_SCN_MEM_DISCARDABLE; + + std::vector<uint8_t> createContents(ChunkVectorT &chunks) const; + + // Returns a list of RVAs that needs to be relocated if the binary is loaded + // at an address different from its preferred one. + std::vector<BaseReloc> listRelocSites(ChunkVectorT &chunks) const; + + // Create the content of a relocation block. + std::vector<uint8_t> + createBaseRelocBlock(uint64_t pageAddr, const BaseReloc *begin, + const BaseReloc *end) const; + + const PECOFFLinkingContext &_ctx; + std::vector<uint8_t> _contents; +}; + +template <class PEHeader> +PEHeaderChunk<PEHeader>::PEHeaderChunk(const PECOFFLinkingContext &ctx) + : HeaderChunk() { + // Set the size of the chunk and initialize the header with null bytes. + _size = sizeof(llvm::COFF::PEMagic) + sizeof(_coffHeader) + sizeof(_peHeader); + std::memset(&_coffHeader, 0, sizeof(_coffHeader)); + std::memset(&_peHeader, 0, sizeof(_peHeader)); + + _coffHeader.Machine = ctx.getMachineType(); + _coffHeader.TimeDateStamp = time(nullptr); + + // Attributes of the executable. + uint16_t characteristics = llvm::COFF::IMAGE_FILE_EXECUTABLE_IMAGE; + if (!ctx.is64Bit()) + characteristics |= llvm::COFF::IMAGE_FILE_32BIT_MACHINE; + if (ctx.isDll()) + characteristics |= llvm::COFF::IMAGE_FILE_DLL; + if (ctx.getLargeAddressAware() || ctx.is64Bit()) + characteristics |= llvm::COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE; + if (ctx.getSwapRunFromCD()) + characteristics |= llvm::COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; + if (ctx.getSwapRunFromNet()) + characteristics |= llvm::COFF::IMAGE_FILE_NET_RUN_FROM_SWAP; + if (!ctx.getBaseRelocationEnabled()) + characteristics |= llvm::COFF::IMAGE_FILE_RELOCS_STRIPPED; + + _coffHeader.Characteristics = characteristics; + + _peHeader.Magic = ctx.is64Bit() ? llvm::COFF::PE32Header::PE32_PLUS + : llvm::COFF::PE32Header::PE32; + + // The address of the executable when loaded into memory. The default for + // DLLs is 0x10000000. The default for executables is 0x400000. + _peHeader.ImageBase = ctx.getBaseAddress(); + + // Sections should be page-aligned when loaded into memory, which is 4KB on + // x86. + _peHeader.SectionAlignment = ctx.getSectionDefaultAlignment(); + + // Sections in an executable file on disk should be sector-aligned (512 byte). + _peHeader.FileAlignment = SECTOR_SIZE; + + // The version number of the resultant executable/DLL. The number is purely + // informative, and neither the linker nor the loader won't use it. User can + // set the value using /version command line option. Default is 0.0. + PECOFFLinkingContext::Version imageVersion = ctx.getImageVersion(); + _peHeader.MajorImageVersion = imageVersion.majorVersion; + _peHeader.MinorImageVersion = imageVersion.minorVersion; + + // The required Windows version number. This is the internal version and + // shouldn't be confused with product name. Windows 7 is version 6.1 and + // Windows 8 is 6.2, for example. + PECOFFLinkingContext::Version minOSVersion = ctx.getMinOSVersion(); + _peHeader.MajorOperatingSystemVersion = minOSVersion.majorVersion; + _peHeader.MinorOperatingSystemVersion = minOSVersion.minorVersion; + _peHeader.MajorSubsystemVersion = minOSVersion.majorVersion; + _peHeader.MinorSubsystemVersion = minOSVersion.minorVersion; + + _peHeader.Subsystem = ctx.getSubsystem(); + + // Despite its name, DLL characteristics field has meaning both for + // executables and DLLs. We are not very sure if the following bits must + // be set, but regular binaries seem to have these bits, so we follow + // them. + uint16_t dllCharacteristics = 0; + if (ctx.noSEH()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_SEH; + if (ctx.isTerminalServerAware()) + dllCharacteristics |= + llvm::COFF::IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; + if (ctx.isNxCompat()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; + if (ctx.getDynamicBaseEnabled()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; + if (!ctx.getAllowBind()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_BIND; + if (!ctx.getAllowIsolation()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; + if (ctx.getHighEntropyVA() && ctx.is64Bit()) + dllCharacteristics |= llvm::COFF::IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; + _peHeader.DLLCharacteristics = dllCharacteristics; + + _peHeader.SizeOfStackReserve = ctx.getStackReserve(); + _peHeader.SizeOfStackCommit = ctx.getStackCommit(); + _peHeader.SizeOfHeapReserve = ctx.getHeapReserve(); + _peHeader.SizeOfHeapCommit = ctx.getHeapCommit(); + + // The number of data directory entries. We always have 16 entries. + _peHeader.NumberOfRvaAndSize = 16; + + // The size of PE header including optional data directory. + _coffHeader.SizeOfOptionalHeader = sizeof(PEHeader) + + _peHeader.NumberOfRvaAndSize * sizeof(llvm::object::data_directory); +} + +template <> +void PEHeaderChunk<llvm::object::pe32_header>::setBaseOfData(uint32_t rva) { + _peHeader.BaseOfData = rva; +} + +template <> +void PEHeaderChunk<llvm::object::pe32plus_header>::setBaseOfData(uint32_t rva) { + // BaseOfData field does not exist in PE32+ header. +} + +template <class PEHeader> +void PEHeaderChunk<PEHeader>::write(uint8_t *buffer) { + std::memcpy(buffer, llvm::COFF::PEMagic, sizeof(llvm::COFF::PEMagic)); + buffer += sizeof(llvm::COFF::PEMagic); + std::memcpy(buffer, &_coffHeader, sizeof(_coffHeader)); + buffer += sizeof(_coffHeader); + std::memcpy(buffer, &_peHeader, sizeof(_peHeader)); +} + +AtomChunk::AtomChunk(const PECOFFLinkingContext &ctx, StringRef sectionName, + const std::vector<const DefinedAtom *> &atoms) + : SectionChunk(kindAtomChunk, sectionName, + computeCharacteristics(ctx, sectionName, atoms), ctx), + _virtualAddress(0), _machineType(ctx.getMachineType()), _ctx(ctx) { + for (auto *a : atoms) + appendAtom(a); +} + +void AtomChunk::write(uint8_t *buffer) { + if (_atomLayouts.empty()) + return; + if (getCharacteristics() & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) + return; + if (getCharacteristics() & llvm::COFF::IMAGE_SCN_CNT_CODE) { + // Fill the section with INT 3 (0xCC) rather than NUL, so that the + // disassembler will not interpret a garbage between atoms as the beginning + // of multi-byte machine code. This does not change the behavior of + // resulting binary but help debugging. + uint8_t *start = buffer + _atomLayouts.front()->_fileOffset; + uint8_t *end = buffer + _atomLayouts.back()->_fileOffset; + memset(start, 0xCC, end - start); + } + + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + ArrayRef<uint8_t> rawContent = atom->rawContent(); + std::memcpy(buffer + layout->_fileOffset, rawContent.data(), + rawContent.size()); + } +} + +// Add all atoms to the given map. This data will be used to do relocation. +void +AtomChunk::buildAtomRvaMap(std::map<const Atom *, uint64_t> &atomRva) const { + for (const auto *layout : _atomLayouts) + atomRva[layout->_atom] = layout->_virtualAddr; +} + +static int getSectionIndex(uint64_t targetAddr, + const std::vector<uint64_t> §ionRva) { + int i = 1; + for (uint64_t rva : sectionRva) { + if (targetAddr < rva) + return i; + ++i; + } + return i; +} + +static uint32_t getSectionStartAddr(uint64_t targetAddr, + const std::vector<uint64_t> §ionRva) { + // Scan the list of section start addresses to find the section start address + // for the given RVA. + for (int i = 0, e = sectionRva.size(); i < e; ++i) + if (i == e - 1 || (sectionRva[i] <= targetAddr && targetAddr < sectionRva[i + 1])) + return sectionRva[i]; + llvm_unreachable("Section missing"); +} + +static void applyThumbMoveImmediate(ulittle16_t *mov, uint16_t imm) { + // MOVW(T3): |11110|i|10|0|1|0|0|imm4|0|imm3|Rd|imm8| + // imm32 = zext imm4:i:imm3:imm8 + // MOVT(T1): |11110|i|10|1|1|0|0|imm4|0|imm3|Rd|imm8| + // imm16 = imm4:i:imm3:imm8 + mov[0] = + mov[0] | (((imm & 0x0800) >> 11) << 10) | (((imm & 0xf000) >> 12) << 0); + mov[1] = + mov[1] | (((imm & 0x0700) >> 8) << 12) | (((imm & 0x00ff) >> 0) << 0); +} + +static void applyThumbBranchImmediate(ulittle16_t *bl, int32_t imm) { + // BL(T1): |11110|S|imm10|11|J1|1|J2|imm11| + // imm32 = sext S:I1:I2:imm10:imm11:'0' + // B.W(T4): |11110|S|imm10|10|J1|1|J2|imm11| + // imm32 = sext S:I1:I2:imm10:imm11:'0' + // + // I1 = ~(J1 ^ S), I2 = ~(J2 ^ S) + + assert((~abs(imm) & (-1 << 24)) && "bl/b.w out of range"); + + uint32_t S = (imm < 0 ? 1 : 0); + uint32_t J1 = ((~imm & 0x00800000) >> 23) ^ S; + uint32_t J2 = ((~imm & 0x00400000) >> 22) ^ S; + + bl[0] = bl[0] | (((imm & 0x003ff000) >> 12) << 0) | (S << 10); + bl[1] = bl[1] | (((imm & 0x00000ffe) >> 1) << 0) | (J2 << 11) | (J1 << 13); +} + +void AtomChunk::applyRelocationsARM(uint8_t *Buffer, + std::map<const Atom *, uint64_t> &AtomRVA, + std::vector<uint64_t> &SectionRVA, + uint64_t ImageBase) { + Buffer = Buffer + _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *Atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *R : *Atom) { + if (R->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + bool AssumeTHUMBCode = false; + if (auto Target = dyn_cast<DefinedAtom>(R->target())) + AssumeTHUMBCode = Target->permissions() == DefinedAtom::permR_X || + Target->permissions() == DefinedAtom::permRWX; + + const auto AtomOffset = R->offsetInAtom(); + const auto FileOffset = layout->_fileOffset; + const auto TargetAddr = AtomRVA[R->target()] | (AssumeTHUMBCode ? 1 : 0); + auto RelocSite16 = + reinterpret_cast<ulittle16_t *>(Buffer + FileOffset + AtomOffset); + auto RelocSite32 = + reinterpret_cast<ulittle32_t *>(Buffer + FileOffset + AtomOffset); + + switch (R->kindValue()) { + default: llvm_unreachable("unsupported relocation type"); + case llvm::COFF::IMAGE_REL_ARM_ADDR32: + *RelocSite32 = *RelocSite32 + TargetAddr + ImageBase; + break; + case llvm::COFF::IMAGE_REL_ARM_ADDR32NB: + *RelocSite32 = *RelocSite32 + TargetAddr; + break; + case llvm::COFF::IMAGE_REL_ARM_MOV32T: + applyThumbMoveImmediate(&RelocSite16[0], (TargetAddr + ImageBase) >> 0); + applyThumbMoveImmediate(&RelocSite16[2], (TargetAddr + ImageBase) >> 16); + break; + case llvm::COFF::IMAGE_REL_ARM_BRANCH24T: + // NOTE: the thumb bit will implicitly be truncated properly + applyThumbBranchImmediate(RelocSite16, + TargetAddr - AtomRVA[Atom] - AtomOffset - 4); + break; + case llvm::COFF::IMAGE_REL_ARM_BLX23T: + // NOTE: the thumb bit will implicitly be truncated properly + applyThumbBranchImmediate(RelocSite16, + TargetAddr - AtomRVA[Atom] - AtomOffset - 4); + break; + } + } + }); +} + +void AtomChunk::applyRelocationsX86(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBaseAddress) { + buffer += _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + // Skip if this reference is not for COFF relocation. + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + auto relocSite32 = reinterpret_cast<ulittle32_t *>( + buffer + layout->_fileOffset + ref->offsetInAtom()); + auto relocSite16 = reinterpret_cast<ulittle16_t *>(relocSite32); + const Atom *target = ref->target(); + uint64_t targetAddr = atomRva[target]; + // Also account for whatever offset is already stored at the relocation + // site. + switch (ref->kindValue()) { + case llvm::COFF::IMAGE_REL_I386_ABSOLUTE: + // This relocation is no-op. + break; + case llvm::COFF::IMAGE_REL_I386_DIR32: + // Set target's 32-bit VA. + if (auto *abs = dyn_cast<AbsoluteAtom>(target)) + *relocSite32 += abs->value(); + else + *relocSite32 += targetAddr + imageBaseAddress; + break; + case llvm::COFF::IMAGE_REL_I386_DIR32NB: + // Set target's 32-bit RVA. + *relocSite32 += targetAddr; + break; + case llvm::COFF::IMAGE_REL_I386_REL32: { + // Set 32-bit relative address of the target. This relocation is + // usually used for relative branch or call instruction. + uint32_t disp = atomRva[atom] + ref->offsetInAtom() + 4; + *relocSite32 += targetAddr - disp; + break; + } + case llvm::COFF::IMAGE_REL_I386_SECTION: + // The 16-bit section index that contains the target symbol. + *relocSite16 += getSectionIndex(targetAddr, sectionRva); + break; + case llvm::COFF::IMAGE_REL_I386_SECREL: + // The 32-bit relative address from the beginning of the section that + // contains the target symbol. + *relocSite32 += + targetAddr - getSectionStartAddr(targetAddr, sectionRva); + break; + default: + llvm::report_fatal_error("Unsupported relocation kind"); + } + } + }); +} + +void AtomChunk::applyRelocationsX64(uint8_t *buffer, + std::map<const Atom *, uint64_t> &atomRva, + std::vector<uint64_t> §ionRva, + uint64_t imageBase) { + buffer += _fileOffset; + parallel_for_each(_atomLayouts.begin(), _atomLayouts.end(), + [&](const AtomLayout *layout) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + uint8_t *loc = buffer + layout->_fileOffset + ref->offsetInAtom(); + auto relocSite16 = reinterpret_cast<ulittle16_t *>(loc); + auto relocSite32 = reinterpret_cast<ulittle32_t *>(loc); + auto relocSite64 = reinterpret_cast<ulittle64_t *>(loc); + uint64_t targetAddr = atomRva[ref->target()]; + + switch (ref->kindValue()) { + case llvm::COFF::IMAGE_REL_AMD64_ADDR64: + *relocSite64 += targetAddr + imageBase; + break; + case llvm::COFF::IMAGE_REL_AMD64_ADDR32: + *relocSite32 += targetAddr + imageBase; + break; + case llvm::COFF::IMAGE_REL_AMD64_ADDR32NB: + *relocSite32 += targetAddr; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 4; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_1: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 5; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_2: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 6; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_3: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 7; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_4: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 8; + break; + case llvm::COFF::IMAGE_REL_AMD64_REL32_5: + *relocSite32 += targetAddr - atomRva[atom] - ref->offsetInAtom() - 9; + break; + case llvm::COFF::IMAGE_REL_AMD64_SECTION: + *relocSite16 += getSectionIndex(targetAddr, sectionRva) - 1; + break; + case llvm::COFF::IMAGE_REL_AMD64_SECREL: + *relocSite32 += + targetAddr - getSectionStartAddr(targetAddr, sectionRva); + break; + default: + llvm::errs() << "Kind: " << (int)ref->kindValue() << "\n"; + llvm::report_fatal_error("Unsupported relocation kind"); + } + } + }); +} + +/// Print atom VAs. Used only for debugging. +void AtomChunk::printAtomAddresses(uint64_t baseAddr) const { + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + uint64_t addr = layout->_virtualAddr; + llvm::dbgs() << llvm::format("0x%08llx: ", addr + baseAddr) + << (atom->name().empty() ? "(anonymous)" : atom->name()) + << "\n"; + } +} + +/// List all virtual addresses (and not relative virtual addresses) that need +/// to be fixed up if image base is relocated. The only relocation type that +/// needs to be fixed is DIR32 on i386. REL32 is not (and should not be) +/// fixed up because it's PC-relative. +void AtomChunk::addBaseRelocations(std::vector<BaseReloc> &relocSites) const { + for (const auto *layout : _atomLayouts) { + const DefinedAtom *atom = cast<DefinedAtom>(layout->_atom); + for (const Reference *ref : *atom) { + if (ref->kindNamespace() != Reference::KindNamespace::COFF) + continue; + + // An absolute symbol points to a fixed location in memory. Their + // address should not be fixed at load time. One exception is ImageBase + // because that's relative to run-time image base address. + if (auto *abs = dyn_cast<AbsoluteAtom>(ref->target())) + if (!abs->name().equals("__ImageBase") && + !abs->name().equals("___ImageBase")) + continue; + + uint64_t address = layout->_virtualAddr + ref->offsetInAtom(); + switch (_machineType) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_I386_DIR32) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_HIGHLOW)); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_AMD64_ADDR64) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_DIR64)); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + if (ref->kindValue() == llvm::COFF::IMAGE_REL_ARM_ADDR32) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_HIGHLOW)); + else if (ref->kindValue() == llvm::COFF::IMAGE_REL_ARM_MOV32T) + relocSites.push_back( + BaseReloc(address, llvm::COFF::IMAGE_REL_BASED_ARM_MOV32T)); + break; + } + } + } +} + +void AtomChunk::setVirtualAddress(uint32_t rva) { + SectionChunk::setVirtualAddress(rva); + for (AtomLayout *layout : _atomLayouts) + layout->_virtualAddr += rva; +} + +uint64_t AtomChunk::getAtomVirtualAddress(StringRef name) const { + for (auto atomLayout : _atomLayouts) + if (atomLayout->_atom->name() == name) + return atomLayout->_virtualAddr; + return 0; +} + +void DataDirectoryChunk::setField(DataDirectoryIndex index, uint32_t addr, + uint32_t size) { + llvm::object::data_directory &dir = _data[index]; + dir.RelativeVirtualAddress = addr; + dir.Size = size; +} + +void DataDirectoryChunk::write(uint8_t *buffer) { + std::memcpy(buffer, &_data[0], size()); +} + +uint64_t AtomChunk::memAlign() const { + // ReaderCOFF propagated the section alignment to the first atom in + // the section. We restore that here. + if (_atomLayouts.empty()) + return _ctx.getPageSize(); + int align = _ctx.getPageSize(); + for (auto atomLayout : _atomLayouts) { + auto *atom = cast<const DefinedAtom>(atomLayout->_atom); + align = std::max(align, 1 << atom->alignment().powerOf2); + } + return align; +} + +void AtomChunk::appendAtom(const DefinedAtom *atom) { + // Atom may have to be at a proper alignment boundary. If so, move the + // pointer to make a room after the last atom before adding new one. + _size = llvm::RoundUpToAlignment(_size, 1 << atom->alignment().powerOf2); + + // Create an AtomLayout and move the current pointer. + auto *layout = new (_alloc) AtomLayout(atom, _size, _size); + _atomLayouts.push_back(layout); + _size += atom->size(); +} + +uint32_t AtomChunk::getDefaultCharacteristics( + StringRef name, const std::vector<const DefinedAtom *> &atoms) const { + const uint32_t code = llvm::COFF::IMAGE_SCN_CNT_CODE; + const uint32_t execute = llvm::COFF::IMAGE_SCN_MEM_EXECUTE; + const uint32_t read = llvm::COFF::IMAGE_SCN_MEM_READ; + const uint32_t write = llvm::COFF::IMAGE_SCN_MEM_WRITE; + const uint32_t data = llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + const uint32_t bss = llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (name == ".text") + return code | execute | read; + if (name == ".data") + return data | read | write; + if (name == ".rdata") + return data | read; + if (name == ".bss") + return bss | read | write; + assert(atoms.size() > 0); + switch (atoms[0]->permissions()) { + case DefinedAtom::permR__: + return data | read; + case DefinedAtom::permRW_: + return data | read | write; + case DefinedAtom::permR_X: + return code | execute | read; + case DefinedAtom::permRWX: + return code | execute | read | write; + default: + llvm_unreachable("Unsupported permission"); + } +} + +void SectionHeaderTableChunk::addSection(SectionChunk *chunk) { + _sections.push_back(chunk); +} + +uint64_t SectionHeaderTableChunk::size() const { + return _sections.size() * sizeof(llvm::object::coff_section); +} + +void SectionHeaderTableChunk::write(uint8_t *buffer) { + uint64_t offset = 0; + for (SectionChunk *chunk : _sections) { + llvm::object::coff_section header = createSectionHeader(chunk); + std::memcpy(buffer + offset, &header, sizeof(header)); + offset += sizeof(header); + } +} + +llvm::object::coff_section +SectionHeaderTableChunk::createSectionHeader(SectionChunk *chunk) { + llvm::object::coff_section header; + + // We have extended the COFF specification by allowing section names to be + // greater than eight characters. We achieve this by adding the section names + // to the string table. Binutils' linker, ld, performs the same trick. + StringRef sectionName = chunk->getSectionName(); + std::memset(header.Name, 0, llvm::COFF::NameSize); + if (uint32_t stringTableOffset = chunk->getStringTableOffset()) + sprintf(header.Name, "/%u", stringTableOffset); + else + std::strncpy(header.Name, sectionName.data(), sectionName.size()); + + uint32_t characteristics = chunk->getCharacteristics(); + header.VirtualSize = chunk->size(); + header.VirtualAddress = chunk->getVirtualAddress(); + header.SizeOfRawData = chunk->onDiskSize(); + header.PointerToRelocations = 0; + header.PointerToLinenumbers = 0; + header.NumberOfRelocations = 0; + header.NumberOfLinenumbers = 0; + header.Characteristics = characteristics; + + if (characteristics & llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) { + header.PointerToRawData = 0; + } else { + header.PointerToRawData = chunk->fileOffset(); + } + return header; +} + +/// Creates .reloc section content from the other sections. The content of +/// .reloc is basically a list of relocation sites. The relocation sites are +/// divided into blocks. Each block represents the base relocation for a 4K +/// page. +/// +/// By dividing 32 bit RVAs into blocks, COFF saves disk and memory space for +/// the base relocation. A block consists of a 32 bit page RVA and 16 bit +/// relocation entries which represent offsets in the page. That is a more +/// compact representation than a simple vector of 32 bit RVAs. +std::vector<uint8_t> +BaseRelocChunk::createContents(ChunkVectorT &chunks) const { + std::vector<uint8_t> contents; + std::vector<BaseReloc> relocSites = listRelocSites(chunks); + + uint64_t mask = _ctx.getPageSize() - 1; + parallel_sort(relocSites.begin(), relocSites.end(), + [=](const BaseReloc &a, const BaseReloc &b) { + return (a.addr & ~mask) < (b.addr & ~mask); + }); + + // Base relocations for the same memory page are grouped together + // and passed to createBaseRelocBlock. + for (auto it = relocSites.begin(), e = relocSites.end(); it != e;) { + auto beginIt = it; + uint64_t pageAddr = (beginIt->addr & ~mask); + for (++it; it != e; ++it) + if ((it->addr & ~mask) != pageAddr) + break; + const BaseReloc *begin = &*beginIt; + const BaseReloc *end = begin + (it - beginIt); + std::vector<uint8_t> block = createBaseRelocBlock(pageAddr, begin, end); + contents.insert(contents.end(), block.begin(), block.end()); + } + return contents; +} + +// Returns a list of RVAs that needs to be relocated if the binary is loaded +// at an address different from its preferred one. +std::vector<BaseReloc> +BaseRelocChunk::listRelocSites(ChunkVectorT &chunks) const { + std::vector<BaseReloc> ret; + for (auto &cp : chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->addBaseRelocations(ret); + return ret; +} + +// Create the content of a relocation block. +std::vector<uint8_t> +BaseRelocChunk::createBaseRelocBlock(uint64_t pageAddr, + const BaseReloc *begin, + const BaseReloc *end) const { + // Relocation blocks should be padded with IMAGE_REL_I386_ABSOLUTE to be + // aligned to a DWORD size boundary. + uint32_t size = llvm::RoundUpToAlignment( + sizeof(ulittle32_t) * 2 + sizeof(ulittle16_t) * (end - begin), + sizeof(ulittle32_t)); + std::vector<uint8_t> contents(size); + uint8_t *ptr = &contents[0]; + + // The first four bytes is the page RVA. + write32le(ptr, pageAddr); + ptr += sizeof(ulittle32_t); + + // The second four bytes is the size of the block, including the the page + // RVA and this size field. + write32le(ptr, size); + ptr += sizeof(ulittle32_t); + + uint64_t mask = _ctx.getPageSize() - 1; + for (const BaseReloc *i = begin; i < end; ++i) { + write16le(ptr, (i->type << 12) | (i->addr & mask)); + ptr += sizeof(ulittle16_t); + } + return contents; +} + +} // end anonymous namespace + +class PECOFFWriter : public Writer { +public: + explicit PECOFFWriter(const PECOFFLinkingContext &context) + : _ctx(context), _numSections(0), _imageSizeInMemory(_ctx.getPageSize()), + _imageSizeOnDisk(0) {} + + template <class PEHeader> void build(const File &linkedFile); + std::error_code writeFile(const File &linkedFile, StringRef path) override; + +private: + void applyAllRelocations(uint8_t *bufferStart); + void printAllAtomAddresses() const; + void reorderSEHTableEntries(uint8_t *bufferStart); + void reorderSEHTableEntriesX86(uint8_t *bufferStart); + void reorderSEHTableEntriesX64(uint8_t *bufferStart); + + void addChunk(Chunk *chunk); + void addSectionChunk(std::unique_ptr<SectionChunk> chunk, + SectionHeaderTableChunk *table, + StringTableChunk *stringTable); + void setImageSizeOnDisk(); + uint64_t + calcSectionSize(llvm::COFF::SectionCharacteristics sectionType) const; + + uint64_t calcSizeOfInitializedData() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_INITIALIZED_DATA); + } + + uint64_t calcSizeOfUninitializedData() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA); + } + + uint64_t calcSizeOfCode() const { + return calcSectionSize(llvm::COFF::IMAGE_SCN_CNT_CODE); + } + + std::vector<std::unique_ptr<Chunk> > _chunks; + const PECOFFLinkingContext &_ctx; + uint32_t _numSections; + + // The size of the image in memory. This is initialized with + // _ctx.getPageSize(), as the first page starting at ImageBase is usually left + // unmapped. IIUC there's no technical reason to do so, but we'll follow that + // convention so that we don't produce odd-looking binary. + uint32_t _imageSizeInMemory; + + // The size of the image on disk. This is basically the sum of all chunks in + // the output file with paddings between them. + uint32_t _imageSizeOnDisk; + + // The map from atom to its relative virtual address. + std::map<const Atom *, uint64_t> _atomRva; +}; + +StringRef customSectionName(const DefinedAtom *atom) { + assert(atom->sectionChoice() == DefinedAtom::sectionCustomRequired); + StringRef s = atom->customSectionName(); + size_t pos = s.find('$'); + return (pos == StringRef::npos) ? s : s.substr(0, pos); +} + +StringRef chooseSectionByContent(const DefinedAtom *atom) { + switch (atom->contentType()) { + case DefinedAtom::typeCode: + return ".text"; + case DefinedAtom::typeZeroFill: + return ".bss"; + case DefinedAtom::typeData: + if (atom->permissions() == DefinedAtom::permR__) + return ".rdata"; + if (atom->permissions() == DefinedAtom::permRW_) + return ".data"; + break; + default: + break; + } + llvm::errs() << "Atom: contentType=" << atom->contentType() + << " permission=" << atom->permissions() << "\n"; + llvm::report_fatal_error("Failed to choose section based on content"); +} + +typedef std::map<StringRef, std::vector<const DefinedAtom *> > AtomVectorMap; + +void groupAtoms(const PECOFFLinkingContext &ctx, const File &file, + AtomVectorMap &result) { + for (const DefinedAtom *atom : file.defined()) { + if (atom->sectionChoice() == DefinedAtom::sectionCustomRequired) { + StringRef section = customSectionName(atom); + result[ctx.getOutputSectionName(section)].push_back(atom); + continue; + } + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + StringRef section = chooseSectionByContent(atom); + result[ctx.getOutputSectionName(section)].push_back(atom); + continue; + } + llvm_unreachable("Unknown section choice"); + } +} + +static const DefinedAtom *findTLSUsedSymbol(const PECOFFLinkingContext &ctx, + const File &file) { + StringRef sym = ctx.decorateSymbol("_tls_used"); + for (const DefinedAtom *atom : file.defined()) + if (atom->name() == sym) + return atom; + return nullptr; +} + +// Create all chunks that consist of the output file. +template <class PEHeader> +void PECOFFWriter::build(const File &linkedFile) { + AtomVectorMap atoms; + groupAtoms(_ctx, linkedFile, atoms); + + // Create file chunks and add them to the list. + auto *dosStub = new DOSStubChunk(_ctx); + auto *peHeader = new PEHeaderChunk<PEHeader>(_ctx); + auto *dataDirectory = new DataDirectoryChunk(); + auto *sectionTable = new SectionHeaderTableChunk(); + auto *stringTable = new StringTableChunk(); + addChunk(dosStub); + addChunk(peHeader); + addChunk(dataDirectory); + addChunk(sectionTable); + addChunk(stringTable); + + // Create sections and add the atoms to them. + for (auto i : atoms) { + StringRef sectionName = i.first; + std::vector<const DefinedAtom *> &contents = i.second; + std::unique_ptr<SectionChunk> section( + new AtomChunk(_ctx, sectionName, contents)); + if (section->size() > 0) + addSectionChunk(std::move(section), sectionTable, stringTable); + } + + // Build atom to its RVA map. + for (std::unique_ptr<Chunk> &cp : _chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->buildAtomRvaMap(_atomRva); + + // We know the addresses of all defined atoms that needs to be + // relocated. So we can create the ".reloc" section which contains + // all the relocation sites. + if (_ctx.getBaseRelocationEnabled()) { + std::unique_ptr<SectionChunk> baseReloc(new BaseRelocChunk(_chunks, _ctx)); + if (baseReloc->size()) { + SectionChunk &ref = *baseReloc; + addSectionChunk(std::move(baseReloc), sectionTable, stringTable); + dataDirectory->setField(DataDirectoryIndex::BASE_RELOCATION_TABLE, + ref.getVirtualAddress(), ref.size()); + } + } + + setImageSizeOnDisk(); + + if (stringTable->size()) { + peHeader->setPointerToSymbolTable(stringTable->fileOffset()); + peHeader->setNumberOfSymbols(1); + } + + for (std::unique_ptr<Chunk> &chunk : _chunks) { + SectionChunk *section = dyn_cast<SectionChunk>(chunk.get()); + if (!section) + continue; + if (section->getSectionName() == ".text") { + peHeader->setBaseOfCode(section->getVirtualAddress()); + + // Find the virtual address of the entry point symbol if any. PECOFF spec + // says that entry point for dll images is optional, in which case it must + // be set to 0. + if (_ctx.hasEntry()) { + AtomChunk *atom = cast<AtomChunk>(section); + uint64_t entryPointAddress = + atom->getAtomVirtualAddress(_ctx.getEntrySymbolName()); + + if (entryPointAddress) { + // NOTE: ARM NT assumes a pure THUMB execution, so adjust the entry + // point accordingly + if (_ctx.getMachineType() == llvm::COFF::IMAGE_FILE_MACHINE_ARMNT) + entryPointAddress |= 1; + peHeader->setAddressOfEntryPoint(entryPointAddress); + } + } else { + peHeader->setAddressOfEntryPoint(0); + } + } + StringRef name = section->getSectionName(); + if (name == ".data") { + peHeader->setBaseOfData(section->getVirtualAddress()); + continue; + } + DataDirectoryIndex ignore = DataDirectoryIndex(-1); + DataDirectoryIndex idx = llvm::StringSwitch<DataDirectoryIndex>(name) + .Case(".pdata", DataDirectoryIndex::EXCEPTION_TABLE) + .Case(".rsrc", DataDirectoryIndex::RESOURCE_TABLE) + .Case(".idata.a", DataDirectoryIndex::IAT) + .Case(".idata.d", DataDirectoryIndex::IMPORT_TABLE) + .Case(".edata", DataDirectoryIndex::EXPORT_TABLE) + .Case(".loadcfg", DataDirectoryIndex::LOAD_CONFIG_TABLE) + .Case(".didat.d", DataDirectoryIndex::DELAY_IMPORT_DESCRIPTOR) + .Default(ignore); + if (idx == ignore) + continue; + dataDirectory->setField(idx, section->getVirtualAddress(), section->size()); + } + + if (const DefinedAtom *atom = findTLSUsedSymbol(_ctx, linkedFile)) { + dataDirectory->setField(DataDirectoryIndex::TLS_TABLE, _atomRva[atom], + 0x18); + } + + // Now that we know the size and file offset of sections. Set the file + // header accordingly. + peHeader->setSizeOfCode(calcSizeOfCode()); + peHeader->setSizeOfInitializedData(calcSizeOfInitializedData()); + peHeader->setSizeOfUninitializedData(calcSizeOfUninitializedData()); + peHeader->setNumberOfSections(_numSections); + peHeader->setSizeOfImage(_imageSizeInMemory); + peHeader->setSizeOfHeaders(sectionTable->fileOffset() + sectionTable->size()); +} + +std::error_code PECOFFWriter::writeFile(const File &linkedFile, + StringRef path) { + if (_ctx.is64Bit()) { + this->build<llvm::object::pe32plus_header>(linkedFile); + } else { + this->build<llvm::object::pe32_header>(linkedFile); + } + + uint64_t totalSize = + _chunks.back()->fileOffset() + _chunks.back()->onDiskSize(); + std::unique_ptr<llvm::FileOutputBuffer> buffer; + std::error_code ec = llvm::FileOutputBuffer::create( + path, totalSize, buffer, llvm::FileOutputBuffer::F_executable); + if (ec) + return ec; + + for (std::unique_ptr<Chunk> &chunk : _chunks) + chunk->write(buffer->getBufferStart() + chunk->fileOffset()); + applyAllRelocations(buffer->getBufferStart()); + reorderSEHTableEntries(buffer->getBufferStart()); + DEBUG(printAllAtomAddresses()); + + if (_ctx.isDll()) + writeImportLibrary(_ctx); + + return buffer->commit(); +} + +/// Apply relocations to the output file buffer. This two pass. In the first +/// pass, we visit all atoms to create a map from atom to its virtual +/// address. In the second pass, we visit all relocation references to fix +/// up addresses in the buffer. +void PECOFFWriter::applyAllRelocations(uint8_t *bufferStart) { + // Create the list of section start addresses. It's needed for + // relocations of SECREL type. + std::vector<uint64_t> sectionRva; + for (auto &cp : _chunks) + if (SectionChunk *section = dyn_cast<SectionChunk>(&*cp)) + sectionRva.push_back(section->getVirtualAddress()); + + uint64_t base = _ctx.getBaseAddress(); + for (auto &cp : _chunks) { + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) { + switch (_ctx.getMachineType()) { + default: llvm_unreachable("unsupported machine type"); + case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT: + chunk->applyRelocationsARM(bufferStart, _atomRva, sectionRva, base); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_I386: + chunk->applyRelocationsX86(bufferStart, _atomRva, sectionRva, base); + break; + case llvm::COFF::IMAGE_FILE_MACHINE_AMD64: + chunk->applyRelocationsX64(bufferStart, _atomRva, sectionRva, base); + break; + } + } + } +} + +/// Print atom VAs. Used only for debugging. +void PECOFFWriter::printAllAtomAddresses() const { + for (auto &cp : _chunks) + if (AtomChunk *chunk = dyn_cast<AtomChunk>(&*cp)) + chunk->printAtomAddresses(_ctx.getBaseAddress()); +} + +void PECOFFWriter::reorderSEHTableEntries(uint8_t *bufferStart) { + auto machineType = _ctx.getMachineType(); + if (machineType == llvm::COFF::IMAGE_FILE_MACHINE_I386) + reorderSEHTableEntriesX86(bufferStart); + if (machineType == llvm::COFF::IMAGE_FILE_MACHINE_AMD64) + reorderSEHTableEntriesX64(bufferStart); +} + +/// It seems that the entries in .sxdata must be sorted. This function is called +/// after a COFF file image is created in memory and before it is written to +/// disk. It is safe to reorder entries at this stage because the contents of +/// the entries are RVAs and there's no reference to a .sxdata entry other than +/// to the beginning of the section. +void PECOFFWriter::reorderSEHTableEntriesX86(uint8_t *bufferStart) { + for (std::unique_ptr<Chunk> &chunk : _chunks) { + if (SectionChunk *section = dyn_cast<SectionChunk>(chunk.get())) { + if (section->getSectionName() == ".sxdata") { + int numEntries = section->size() / sizeof(ulittle32_t); + ulittle32_t *begin = reinterpret_cast<ulittle32_t *>(bufferStart + section->fileOffset()); + ulittle32_t *end = begin + numEntries; + std::sort(begin, end); + } + } + } +} + +/// The entries in .pdata must be sorted according to its BeginAddress field +/// value. It's safe to do it because of the same reason as .sxdata. +void PECOFFWriter::reorderSEHTableEntriesX64(uint8_t *bufferStart) { + for (std::unique_ptr<Chunk> &chunk : _chunks) { + if (SectionChunk *section = dyn_cast<SectionChunk>(chunk.get())) { + if (section->getSectionName() != ".pdata") + continue; + int numEntries = section->size() / sizeof(coff_runtime_function_x64); + coff_runtime_function_x64 *begin = + (coff_runtime_function_x64 *)(bufferStart + section->fileOffset()); + coff_runtime_function_x64 *end = begin + numEntries; + std::sort(begin, end, [](const coff_runtime_function_x64 &lhs, + const coff_runtime_function_x64 &rhs) { + return lhs.BeginAddress < rhs.BeginAddress; + }); + } + } +} + +void PECOFFWriter::addChunk(Chunk *chunk) { + _chunks.push_back(std::unique_ptr<Chunk>(chunk)); +} + +void PECOFFWriter::addSectionChunk(std::unique_ptr<SectionChunk> chunk, + SectionHeaderTableChunk *table, + StringTableChunk *stringTable) { + table->addSection(chunk.get()); + _numSections++; + + StringRef sectionName = chunk->getSectionName(); + if (sectionName.size() > llvm::COFF::NameSize) { + uint32_t stringTableOffset = stringTable->addSectionName(sectionName); + chunk->setStringTableOffset(stringTableOffset); + } + + // Compute and set the starting address of sections when loaded in + // memory. They are different from positions on disk because sections need + // to be sector-aligned on disk but page-aligned in memory. + _imageSizeInMemory = llvm::RoundUpToAlignment( + _imageSizeInMemory, chunk->memAlign()); + chunk->setVirtualAddress(_imageSizeInMemory); + _imageSizeInMemory = llvm::RoundUpToAlignment( + _imageSizeInMemory + chunk->size(), _ctx.getPageSize()); + _chunks.push_back(std::move(chunk)); +} + +void PECOFFWriter::setImageSizeOnDisk() { + for (auto &chunk : _chunks) { + // Compute and set the offset of the chunk in the output file. + _imageSizeOnDisk = + llvm::RoundUpToAlignment(_imageSizeOnDisk, chunk->align()); + chunk->setFileOffset(_imageSizeOnDisk); + _imageSizeOnDisk += chunk->onDiskSize(); + } +} + +uint64_t PECOFFWriter::calcSectionSize( + llvm::COFF::SectionCharacteristics sectionType) const { + uint64_t ret = 0; + for (auto &cp : _chunks) + if (SectionChunk *chunk = dyn_cast<SectionChunk>(&*cp)) + if (chunk->getCharacteristics() & sectionType) + ret += chunk->onDiskSize(); + return ret; +} + +} // end namespace pecoff + +std::unique_ptr<Writer> createWriterPECOFF(const PECOFFLinkingContext &info) { + return std::unique_ptr<Writer>(new pecoff::PECOFFWriter(info)); +} + +} // end namespace lld |