summaryrefslogtreecommitdiff
path: root/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp')
-rw-r--r--lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp389
1 files changed, 389 insertions, 0 deletions
diff --git a/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp
new file mode 100644
index 0000000000000..8c9641376a0d2
--- /dev/null
+++ b/lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp
@@ -0,0 +1,389 @@
+//===- lib/ReaderWriter/PECOFF/ReaderImportHeader.cpp ---------------------===//
+//
+// The LLVM Linker
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file \brief This file provides a way to read an import library member in a
+/// .lib file.
+///
+/// Archive Files in Windows
+/// ========================
+///
+/// In Windows, archive files with .lib file extension serve two different
+/// purposes.
+///
+/// - For static linking: An archive file in this use case contains multiple
+/// regular .obj files and is used for static linking. This is the same
+/// usage as .a file in Unix.
+///
+/// - For dynamic linking: An archive file in this use case contains pseudo
+/// .obj files to describe exported symbols of a DLL. Each pseudo .obj file
+/// in an archive has a name of an exported symbol and a DLL filename from
+/// which the symbol can be imported. When you link a DLL on Windows, you
+/// pass the name of the .lib file for the DLL instead of the DLL filename
+/// itself. That is the Windows way of linking against a shared library.
+///
+/// This file contains a function to handle the pseudo object file.
+///
+/// Windows Loader and Import Address Table
+/// =======================================
+///
+/// Windows supports a GOT-like mechanism for DLLs. The executable using DLLs
+/// contains a list of DLL names and list of symbols that need to be resolved by
+/// the loader. Windows loader maps the executable and all the DLLs to memory,
+/// resolves the symbols referencing items in DLLs, and updates the import
+/// address table (IAT) in memory. The IAT is an array of pointers to all of the
+/// data or functions in DLL referenced by the executable. You cannot access
+/// items in DLLs directly. They have to be accessed through an extra level of
+/// indirection.
+///
+/// So, if you want to access an item in DLL, you have to go through a
+/// pointer. How do you actually do that? You need a symbol for a pointer in the
+/// IAT. For each symbol defined in a DLL, a symbol with "__imp_" prefix is
+/// exported from the DLL for an IAT entry. For example, if you have a global
+/// variable "foo" in a DLL, a pointer to the variable is available as
+/// "_imp__foo". The IAT is an array of _imp__ symbols.
+///
+/// Is this OK? That's not that complicated. Because items in a DLL are not
+/// directly accessible, you need to access through a pointer, and the pointer
+/// is available as a symbol with _imp__ prefix.
+///
+/// Note 1: Although you can write code with _imp__ prefix, today's compiler and
+/// linker let you write code as if there's no extra level of indirection.
+/// That's why you haven't seen lots of _imp__ in your code. A variable or a
+/// function declared with "dllimport" attribute is treated as an item in a DLL,
+/// and the compiler automatically mangles its name and inserts the extra level
+/// of indirection when accessing the item. Here are some examples:
+///
+/// __declspec(dllimport) int var_in_dll;
+/// var_in_dll = 3; // is equivalent to *_imp__var_in_dll = 3;
+///
+/// __declspec(dllimport) int fn_in_dll(void);
+/// fn_in_dll(); // is equivalent to (*_imp__fn_in_dll)();
+///
+/// It's just the compiler rewrites code for you so that you don't need to
+/// handle the indirection yourself.
+///
+/// Note 2: __declspec(dllimport) is mandatory for data but optional for
+/// function. For a function, the linker creates a jump table with the original
+/// symbol name, so that the function is accessible without _imp__ prefix. The
+/// same function in a DLL can be called through two different symbols if it's
+/// not dllimport'ed.
+///
+/// (*_imp__fn)()
+/// fn()
+///
+/// The above functions do the same thing. fn's content is a JMP instruction to
+/// branch to the address pointed by _imp__fn. The latter may be a little bit
+/// slower than the former because it will execute the extra JMP instruction,
+/// but that's usually negligible.
+///
+/// If a function is dllimport'ed, which is usually done in a header file,
+/// mangled name will be used at compile time so the jump table will not be
+/// used.
+///
+/// Because there's no way to hide the indirection for data access at link time,
+/// data has to be accessed through dllimport'ed symbols or explicit _imp__
+/// prefix.
+///
+/// Idata Sections in the Pseudo Object File
+/// ========================================
+///
+/// The object file created by cl.exe has several sections whose name starts
+/// with ".idata$" followed by a number. The contents of the sections seem the
+/// fragments of a complete ".idata" section. These sections has relocations for
+/// the data referenced from the idata secton. Generally, the linker discards
+/// "$" and all characters that follow from the section name and merges their
+/// contents to one section. So, it looks like if everything would work fine,
+/// the idata section would naturally be constructed without having any special
+/// code for doing that.
+///
+/// However, the LLD linker cannot do that. An idata section constructed in that
+/// way was never be in valid format. We don't know the reason yet. Our
+/// assumption on the idata fragment could simply be wrong, or the LLD linker is
+/// not powerful enough to do the job. Meanwhile, we construct the idata section
+/// ourselves. All the "idata$" sections in the pseudo object file are currently
+/// ignored.
+///
+/// Creating Atoms for the Import Address Table
+/// ===========================================
+///
+/// The function in this file reads a pseudo object file and creates at most two
+/// atoms. One is a shared library atom for _imp__ symbol. The another is a
+/// defined atom for the JMP instruction if the symbol is for a function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Atoms.h"
+#include "lld/Core/Error.h"
+#include "lld/Core/File.h"
+#include "lld/Core/SharedLibraryAtom.h"
+#include "lld/ReaderWriter/PECOFFLinkingContext.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+#include <map>
+#include <system_error>
+#include <vector>
+
+using namespace lld;
+using namespace lld::pecoff;
+using namespace llvm;
+using namespace llvm::support::endian;
+
+#define DEBUG_TYPE "ReaderImportHeader"
+
+namespace lld {
+
+namespace {
+
+// This code is valid both in x86 and x64.
+const uint8_t FuncAtomContentX86[] = {
+ 0xff, 0x25, 0x00, 0x00, 0x00, 0x00, // JMP *0x0
+ 0xcc, 0xcc // INT 3; INT 3
+};
+
+const uint8_t FuncAtomContentARMNT[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // mov.w ip, #0
+ 0xc0, 0xf2, 0x00, 0x0c, // mov.t ip, #0
+ 0xdc, 0xf8, 0x00, 0xf0, // ldr.w pc, [ip]
+};
+
+static void setJumpInstTarget(COFFLinkerInternalAtom *src, const Atom *dst,
+ int off, MachineTypes machine) {
+ SimpleReference *ref;
+
+ switch (machine) {
+ default: llvm::report_fatal_error("unsupported machine type");
+ case llvm::COFF::IMAGE_FILE_MACHINE_I386:
+ ref = new SimpleReference(Reference::KindNamespace::COFF,
+ Reference::KindArch::x86,
+ llvm::COFF::IMAGE_REL_I386_DIR32,
+ off, dst, 0);
+ break;
+ case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
+ ref = new SimpleReference(Reference::KindNamespace::COFF,
+ Reference::KindArch::x86_64,
+ llvm::COFF::IMAGE_REL_AMD64_REL32,
+ off, dst, 0);
+ break;
+ case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
+ ref = new SimpleReference(Reference::KindNamespace::COFF,
+ Reference::KindArch::ARM,
+ llvm::COFF::IMAGE_REL_ARM_MOV32T,
+ off, dst, 0);
+ break;
+ }
+ src->addReference(std::unique_ptr<SimpleReference>(ref));
+}
+
+/// The defined atom for jump table.
+class FuncAtom : public COFFLinkerInternalAtom {
+public:
+ FuncAtom(const File &file, StringRef symbolName,
+ const COFFSharedLibraryAtom *impAtom, MachineTypes machine)
+ : COFFLinkerInternalAtom(file, /*oridnal*/ 0, createContent(machine),
+ symbolName) {
+ size_t Offset;
+
+ switch (machine) {
+ default: llvm::report_fatal_error("unsupported machine type");
+ case llvm::COFF::IMAGE_FILE_MACHINE_I386:
+ case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
+ Offset = 2;
+ break;
+ case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
+ Offset = 0;
+ break;
+ }
+
+ setJumpInstTarget(this, impAtom, Offset, machine);
+ }
+
+ uint64_t ordinal() const override { return 0; }
+ Scope scope() const override { return scopeGlobal; }
+ ContentType contentType() const override { return typeCode; }
+ Alignment alignment() const override { return Alignment(1); }
+ ContentPermissions permissions() const override { return permR_X; }
+
+private:
+ std::vector<uint8_t> createContent(MachineTypes machine) const {
+ const uint8_t *Data;
+ size_t Size;
+
+ switch (machine) {
+ default: llvm::report_fatal_error("unsupported machine type");
+ case llvm::COFF::IMAGE_FILE_MACHINE_I386:
+ case llvm::COFF::IMAGE_FILE_MACHINE_AMD64:
+ Data = FuncAtomContentX86;
+ Size = sizeof(FuncAtomContentX86);
+ break;
+ case llvm::COFF::IMAGE_FILE_MACHINE_ARMNT:
+ Data = FuncAtomContentARMNT;
+ Size = sizeof(FuncAtomContentARMNT);
+ break;
+ }
+
+ return std::vector<uint8_t>(Data, Data + Size);
+ }
+};
+
+class FileImportLibrary : public File {
+public:
+ FileImportLibrary(std::unique_ptr<MemoryBuffer> mb, MachineTypes machine)
+ : File(mb->getBufferIdentifier(), kindSharedLibrary),
+ _mb(std::move(mb)), _machine(machine) {}
+
+ std::error_code doParse() override {
+ const char *buf = _mb->getBufferStart();
+ const char *end = _mb->getBufferEnd();
+
+ // The size of the string that follows the header.
+ uint32_t dataSize
+ = read32le(buf + offsetof(COFF::ImportHeader, SizeOfData));
+
+ // Check if the total size is valid.
+ if (std::size_t(end - buf) != sizeof(COFF::ImportHeader) + dataSize)
+ return make_error_code(NativeReaderError::unknown_file_format);
+
+ uint16_t hint = read16le(buf + offsetof(COFF::ImportHeader, OrdinalHint));
+ StringRef symbolName(buf + sizeof(COFF::ImportHeader));
+ StringRef dllName(buf + sizeof(COFF::ImportHeader) + symbolName.size() + 1);
+
+ // TypeInfo is a bitfield. The least significant 2 bits are import
+ // type, followed by 3 bit import name type.
+ uint16_t typeInfo = read16le(buf + offsetof(COFF::ImportHeader, TypeInfo));
+ int type = typeInfo & 0x3;
+ int nameType = (typeInfo >> 2) & 0x7;
+
+ // Symbol name used by the linker may be different from the symbol name used
+ // by the loader. The latter may lack symbol decorations, or may not even
+ // have name if it's imported by ordinal.
+ StringRef importName = symbolNameToImportName(symbolName, nameType);
+
+ const COFFSharedLibraryAtom *dataAtom =
+ addSharedLibraryAtom(hint, symbolName, importName, dllName);
+ if (type == llvm::COFF::IMPORT_CODE)
+ addFuncAtom(symbolName, dllName, dataAtom);
+
+ return std::error_code();
+ }
+
+ const atom_collection<DefinedAtom> &defined() const override {
+ return _definedAtoms;
+ }
+
+ const atom_collection<UndefinedAtom> &undefined() const override {
+ return _noUndefinedAtoms;
+ }
+
+ const atom_collection<SharedLibraryAtom> &sharedLibrary() const override {
+ return _sharedLibraryAtoms;
+ }
+
+ const atom_collection<AbsoluteAtom> &absolute() const override {
+ return _noAbsoluteAtoms;
+ }
+
+private:
+ const COFFSharedLibraryAtom *addSharedLibraryAtom(uint16_t hint,
+ StringRef symbolName,
+ StringRef importName,
+ StringRef dllName) {
+ auto *atom = new (_alloc)
+ COFFSharedLibraryAtom(*this, hint, symbolName, importName, dllName);
+ _sharedLibraryAtoms._atoms.push_back(atom);
+ return atom;
+ }
+
+ void addFuncAtom(StringRef symbolName, StringRef dllName,
+ const COFFSharedLibraryAtom *impAtom) {
+ auto *atom = new (_alloc) FuncAtom(*this, symbolName, impAtom, _machine);
+ _definedAtoms._atoms.push_back(atom);
+ }
+
+ atom_collection_vector<DefinedAtom> _definedAtoms;
+ atom_collection_vector<SharedLibraryAtom> _sharedLibraryAtoms;
+ mutable llvm::BumpPtrAllocator _alloc;
+
+ // Does the same thing as StringRef::ltrim() but removes at most one
+ // character.
+ StringRef ltrim1(StringRef str, const char *chars) const {
+ if (!str.empty() && strchr(chars, str[0]))
+ return str.substr(1);
+ return str;
+ }
+
+ // Convert the given symbol name to the import symbol name exported by the
+ // DLL.
+ StringRef symbolNameToImportName(StringRef symbolName, int nameType) const {
+ StringRef ret;
+ switch (nameType) {
+ case llvm::COFF::IMPORT_ORDINAL:
+ // The import is by ordinal. No symbol name will be used to identify the
+ // item in the DLL. Only its ordinal will be used.
+ return "";
+ case llvm::COFF::IMPORT_NAME:
+ // The import name in this case is identical to the symbol name.
+ return symbolName;
+ case llvm::COFF::IMPORT_NAME_NOPREFIX:
+ // The import name is the symbol name without leading ?, @ or _.
+ ret = ltrim1(symbolName, "?@_");
+ break;
+ case llvm::COFF::IMPORT_NAME_UNDECORATE:
+ // Similar to NOPREFIX, but we also need to truncate at the first @.
+ ret = ltrim1(symbolName, "?@_");
+ ret = ret.substr(0, ret.find('@'));
+ break;
+ }
+ std::string *str = new (_alloc) std::string(ret);
+ return *str;
+ }
+
+ std::unique_ptr<MemoryBuffer> _mb;
+ MachineTypes _machine;
+};
+
+class COFFImportLibraryReader : public Reader {
+public:
+ COFFImportLibraryReader(PECOFFLinkingContext &ctx) : _ctx(ctx) {}
+
+ bool canParse(file_magic magic, StringRef,
+ const MemoryBuffer &mb) const override {
+ if (mb.getBufferSize() < sizeof(COFF::ImportHeader))
+ return false;
+ return (magic == llvm::sys::fs::file_magic::coff_import_library);
+ }
+
+ std::error_code
+ loadFile(std::unique_ptr<MemoryBuffer> mb, const class Registry &,
+ std::vector<std::unique_ptr<File> > &result) const override {
+ auto *file = new FileImportLibrary(std::move(mb), _ctx.getMachineType());
+ result.push_back(std::unique_ptr<File>(file));
+ return std::error_code();
+ }
+
+private:
+ PECOFFLinkingContext &_ctx;
+};
+
+} // end anonymous namespace
+
+void Registry::addSupportCOFFImportLibraries(PECOFFLinkingContext &ctx) {
+ add(llvm::make_unique<COFFImportLibraryReader>(ctx));
+}
+
+} // end namespace lld